Exemple #1
0
def train_and_evaluate(args):
    show_lib_version()
    train_filename = args['train_filename']
    bucket_name = args['bucket_name']
    data_loc = os.path.join('gs://', bucket_name, 'data', train_filename)
    # data_loc = 'gs://ancient-snow-224803-ff/data/train.dense'
    print('data_loc:{}, train_filename:{}'.format(data_loc, train_filename))

    # gsutil outputs everything to stderr so we need to divert it to stdout.
    subprocess.check_call(['gsutil', 'cp', data_loc, train_filename],
                          stderr=sys.stdout)
    config = {"params": dict(n_estimators=50, )}

    x, y = load_data(train_filename)
    clf = model.build_estimator(config)
    clf.fit(x, y)

    model_name = 'model.joblib'
    joblib.dump(clf, model_name, compress=3)

    print("Save model to {0}".format(model_name))
    upload_to_gs(model_name, bucket_name)

    try:
        print(
            subprocess.check_output(['pip freeze'],
                                    stdout=sys.stdout,
                                    stderr=sys.stderr))
    except:
        pass
Exemple #2
0
def run_experiment(hparams):
  """Run the training and evaluate using the high level API"""

  train_input = lambda: model.input_fn(
      filename=os.path.join(hparams.data_dir, 'train.tfrecords'),
      batch_size=hparams.train_batch_size
  )

  eval_input = lambda: model.input_fn(
      filename=os.path.join(hparams.data_dir, 'test.tfrecords'),
      batch_size=hparams.eval_batch_size
  )

  train_spec = tf.estimator.TrainSpec(train_input,
                                      max_steps=hparams.train_steps
                                      )

  exporter = tf.estimator.FinalExporter('cnn',
          model.serving_input_fn)
  eval_spec = tf.estimator.EvalSpec(eval_input,
                                    steps=hparams.eval_steps,
                                    exporters=[exporter],
                                    name='cnn-eval'
                                    )

  estimator = model.build_estimator(model_dir=hparams.job_dir)

  tf.estimator.train_and_evaluate(estimator,
                                  train_spec,
                                  eval_spec)
Exemple #3
0
def run_experiment(hparams):
    """Run the training and evaluate using the high level API"""

    train_input = lambda: model.input_fn(hparams.train_files,
                                         num_epochs=hparams.num_epochs,
                                         batch_size=hparams.train_batch_size)

    # Don't shuffle evaluation data
    eval_input = lambda: model.input_fn(
        hparams.eval_files, batch_size=hparams.eval_batch_size, shuffle=False)

    train_spec = tf.estimator.TrainSpec(train_input,
                                        max_steps=hparams.train_steps)

    exporter = tf.estimator.FinalExporter(
        'jimini', model.SERVING_FUNCTIONS[hparams.export_format])
    eval_spec = tf.estimator.EvalSpec(eval_input,
                                      steps=hparams.eval_steps,
                                      exporters=[exporter],
                                      name='jimini-eval')

    run_config = tf.estimator.RunConfig()
    run_config = run_config.replace(model_dir=hparams.job_dir)
    print('model dir {}'.format(run_config.model_dir))
    estimator = model.build_estimator(
        embedding_size=hparams.embedding_size,
        # Construct layers sizes with exponetial decay
        hidden_units=[
            max(2, int(hparams.first_layer_size * hparams.scale_factor**i))
            for i in range(hparams.num_layers)
        ],
        config=run_config)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def run(args):
  """Runs tensorflow model training.

  Args:
    args: Arguments parsed at program executions.
  """

  estimator = model.build_estimator(
      output_dir=args.output_dir,
      first_layer_size=args.first_layer_size,
      num_layers=args.num_layers,
      dropout=args.dropout,
      learning_rate=args.learning_rate,
      save_checkpoints_steps=args.save_checkpoints_steps)

  train_input_fn = input_fn_utils.read_dataset(
      input_dir=args.input_dir,
      mode=tf.contrib.learn.ModeKeys.TRAIN,
      batch_size=args.batch_size)

  eval_input_fn = input_fn_utils.read_dataset(
      input_dir=args.input_dir,
      mode=tf.contrib.learn.ModeKeys.EVAL,
      batch_size=args.batch_size)

  serving_input_fn = input_fn_utils.get_serving_input_fn(args.input_dir)

  train_spec = tf.estimator.TrainSpec(
      input_fn=train_input_fn, hooks=[], max_steps=args.max_steps)

  exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
  eval_spec = tf.estimator.EvalSpec(
      input_fn=eval_input_fn, hooks=[], exporters=exporter)

  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Exemple #5
0
def train_and_evaluate(args):
    """Run the training and evaluate using the high level API."""

    train_input = lambda: model.input_fn(args.train_files,
                                         num_epochs=args.num_epochs,
                                         batch_size=args.train_batch_size)

    # Don't shuffle evaluation data
    eval_input = lambda: model.input_fn(
        args.eval_files, batch_size=args.eval_batch_size, shuffle=False)

    train_spec = tf.estimator.TrainSpec(train_input,
                                        max_steps=args.train_steps)

    exporter = tf.estimator.FinalExporter(
        'census', model.SERVING_FUNCTIONS[args.export_format])
    eval_spec = tf.estimator.EvalSpec(eval_input,
                                      steps=args.eval_steps,
                                      exporters=[exporter],
                                      name='census-eval')

    run_config = tf.estimator.RunConfig(
        session_config=_get_session_config_from_env_var())
    run_config = run_config.replace(model_dir=args.job_dir)
    print('Model dir %s' % run_config.model_dir)
    estimator = model.build_estimator(
        embedding_size=args.embedding_size,
        # Construct layers sizes with exponential decay
        hidden_units=[
            max(2, int(args.first_layer_size * args.scale_factor**i))
            for i in range(args.num_layers)
        ],
        config=run_config)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Exemple #6
0
def run_experiment(hparams):
    """Run the training and evaluate using the high level API"""

    train_input = lambda: model.input_fn(hparams.train_files,
                                         num_epochs=hparams.num_epochs,
                                         batch_size=hparams.train_batch_size)

    # Don't shuffle evaluation data
    eval_input = lambda: model.input_fn(
        hparams.eval_files, batch_size=hparams.eval_batch_size, shuffle=False)

    train_spec = tf.estimator.TrainSpec(train_input,
                                        max_steps=hparams.train_steps)

    exporter = tf.estimator.FinalExporter(
        'airline', model.SERVING_FUNCTIONS[hparams.export_format])
    eval_spec = tf.estimator.EvalSpec(eval_input,
                                      steps=hparams.eval_steps,
                                      exporters=[exporter],
                                      name='airline-eval')

    run_config = tf.estimator.RunConfig()
    run_config = run_config.replace(model_dir=hparams.job_dir)
    print('model dir {}'.format(run_config.model_dir))
    estimator = model.build_estimator(model=hparams.model, config=run_config)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Exemple #7
0
def train_and_evaluate(flags):
    """Runs model training and evaluation using TF Estimator API"""
    #Get TF transform metadata generated during preprocessing
    tf_transform_output = tft.TFTransformOutput(flags.input_dir)

    #Define training spec
    feature_spec = tf_transform_output.transformed_feature_spec()
    train_input_fn = functools.partial(input_util.input_fn,
                                       flags.input_dir,
                                       tf.estimator.ModeKeys.TRAIN,
                                       flags.train_batch_size,
                                       flags.num_epochs,
                                       label_name=metadata.LABEL_COLUMN,
                                       feature_spec=feature_spec)
    train_spec = tf.estimator.TrainSpec(train_input_fn,
                                        max_steps=flags.train_steps)

    #Define eval spec
    eval_input_fn = functools.partial(input_util.input_fn,
                                      flags.input_dir,
                                      tf.estimator.ModeKeys.EVAL,
                                      flags.eval_batch_size,
                                      num_epochs=1,
                                      label_name=metadata.LABEL_COLUMN,
                                      feature_spec=feature_spec)
    exporter = tf.estimator.FinalExporter(
        "export",
        functools.partial(input_util.tfrecord_serving_input_fn,
                          feature_spec,
                          label_name=metadata.LABEL_COLUMN))

    eval_spec = tf.estimator.EvalSpec(eval_input_fn,
                                      steps=flags.eval_steps,
                                      start_delay_secs=flags.eval_start_secs,
                                      exporters=[exporter],
                                      name='MRI-eval')
    steps_per_run_train = 7943 // (flags.train_batch_size * 4)
    steps_per_run_eval = 964 // (flags.eval_batch_size * 4)

    #additional configs required for using TPUs
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        flags.tpu)
    tpu_config = tf.contrib.tpu.TPUConfig(
        num_shards=8,  # using Cloud TPU v2-8
        iterations_per_loop=200)
    #Define training config
    run_config = tf.contrib.tpu.RunConfig(cluster=tpu_cluster_resolver,
                                          model_dir=flags.job_dir,
                                          tpu_config=tpu_config,
                                          save_checkpoints_steps=200,
                                          save_summary_steps=100)
    #Build the estimator
    feature_columns = model.get_feature_columns(
        tf_transform_output, exclude_columns=metadata.NON_FEATURE_COLUMNS)

    estimator = model.build_estimator(run_config, flags, feature_columns)

    #Run training and evaluation
    #tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    estimator.train(train_input_fn)
Exemple #8
0
def main(hparams):
  """Run the training and evaluation using the high level API."""
  with tf.gfile.GFile(hparams.train_file, "r") as f:
    train_df = pd.read_csv(f, compression='gzip')
  with tf.gfile.GFile(hparams.test_file, "r") as f:
    test_df = pd.read_csv(f, compression='gzip')


  tf.logging.info('Done fetching traing and test datasets.')
  trn_input = tf.estimator.inputs.pandas_input_fn(
    x=train_df,
    y=train_df["polarity"],
    num_epochs=None,
    shuffle=True,
    batch_size=hparams.batch_size,
    num_threads=4,
    queue_capacity=hparams.batch_size * 5
  )
  train_spec = tf.estimator.TrainSpec(trn_input, max_steps=hparams.train_steps)

  eval_input = tf.estimator.inputs.pandas_input_fn(
    x=test_df,
    y=test_df["polarity"],
    num_epochs=1,
    shuffle=False,
    batch_size=hparams.batch_size,
    num_threads=4,
    queue_capacity=hparams.batch_size * 5
  )


  # Construct our JSON serving function for predictions via API.
  exporter = tf.estimator.FinalExporter('model', model.build_serving_fn())
  eval_spec = tf.estimator.EvalSpec(
      eval_input,
      throttle_secs=hparams.eval_secs,
      steps=hparams.eval_steps,
      exporters=[exporter],
      start_delay_secs=20
  )

  run_config = tf.estimator.RunConfig(model_dir=hparams.job_dir)

  # Construct layers sizes by halving each layer.
  hidden_units = []
  for i in range(hparams.num_layers):
    units = hparams.first_layer_size / (2**i)
    hidden_units.append(units)

  estimator = model.build_estimator(
      config=run_config,
      hidden_units=hidden_units,
      learning_rate=hparams.learning_rate,
      dropout=hparams.dropout,
      optimizer=hparams.optimizer,
      hub_module=HUB_MODULES.get(hparams.hub_module),
      train_hub=hparams.train_hub_module
  )
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Exemple #9
0
def train_and_maybe_evaluate(hparams):
  """Run the training and evaluate using the high level API.

  Args:
    hparams: Holds hyperparameters used to train the model as name/value pairs.

  Returns:
    The estimator that was used for training (and maybe eval)
  """
  schema = taxi.read_schema(hparams.schema_file)
  tf_transform_output = tft.TFTransformOutput(hparams.tf_transform_dir)

  train_input = lambda: model.input_fn(
      hparams.train_files,
      tf_transform_output,
      batch_size=TRAIN_BATCH_SIZE
  )

  eval_input = lambda: model.input_fn(
      hparams.eval_files,
      tf_transform_output,
      batch_size=EVAL_BATCH_SIZE
  )

  train_spec = tf.estimator.TrainSpec(
      train_input, max_steps=hparams.train_steps)

  serving_receiver_fn = lambda: model.example_serving_receiver_fn(
      tf_transform_output, schema)

  exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn)
  eval_spec = tf.estimator.EvalSpec(
      eval_input,
      steps=hparams.eval_steps,
      exporters=[exporter],
      name='chicago-taxi-eval')

  run_config = tf.estimator.RunConfig(
      save_checkpoints_steps=999, keep_checkpoint_max=1)

  serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR)
  run_config = run_config.replace(model_dir=serving_model_dir)

  estimator = model.build_estimator(
      tf_transform_output,

      # Construct layers sizes with exponetial decay
      hidden_units=[
          max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i))
          for i in range(NUM_DNN_LAYERS)
      ],
      config=run_config)

  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

  return estimator
Exemple #10
0
def train_and_maybe_evaluate(hparams):
  """Run the training and evaluate using the high level API.

  Args:
    hparams: Holds hyperparameters used to train the model as name/value pairs.

  Returns:
    The estimator that was used for training (and maybe eval)
  """
  schema = taxi.read_schema(hparams.schema_file)

  train_input = lambda: model.input_fn(
      hparams.train_files,
      hparams.tf_transform_dir,
      batch_size=TRAIN_BATCH_SIZE
  )

  eval_input = lambda: model.input_fn(
      hparams.eval_files,
      hparams.tf_transform_dir,
      batch_size=EVAL_BATCH_SIZE
  )

  train_spec = tf.estimator.TrainSpec(
      train_input, max_steps=hparams.train_steps)

  serving_receiver_fn = lambda: model.example_serving_receiver_fn(
      hparams.tf_transform_dir, schema)

  exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn)
  eval_spec = tf.estimator.EvalSpec(
      eval_input,
      steps=hparams.eval_steps,
      exporters=[exporter],
      name='chicago-taxi-eval')

  run_config = tf.estimator.RunConfig(
      save_checkpoints_steps=999, keep_checkpoint_max=1)

  serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR)
  run_config = run_config.replace(model_dir=serving_model_dir)

  estimator = model.build_estimator(
      hparams.tf_transform_dir,

      # Construct layers sizes with exponetial decay
      hidden_units=[
          max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i))
          for i in range(NUM_DNN_LAYERS)
      ],
      config=run_config)

  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

  return estimator
Exemple #11
0
def train_and_evaluate(args):
    """Run the training and evaluate using the high level API."""
    def train_input():
        """Input function returning batches from the training
        data set from training.
        """
        return input_module.input_fn(
            args.train_files,
            num_epochs=args.num_epochs,
            batch_size=args.train_batch_size,
            num_parallel_calls=args.num_parallel_calls,
            prefetch_buffer_size=args.prefetch_buffer_size)

    def eval_input():
        """Input function returning the entire validation data
        set for evaluation. Shuffling is not required.
        """
        return input_module.input_fn(
            args.eval_files,
            batch_size=args.eval_batch_size,
            shuffle=False,
            num_parallel_calls=args.num_parallel_calls,
            prefetch_buffer_size=args.prefetch_buffer_size)

    train_spec = tf.estimator.TrainSpec(train_input,
                                        max_steps=args.train_steps)

    #exporter = tf.estimator.FinalExporter(
    #    'census', input_module.SERVING_FUNCTIONS[args.export_format])
    eval_spec = tf.estimator.EvalSpec(
        eval_input,
        steps=args.eval_steps,
        #exporters=[exporter],
        name='census-eval')

    run_config = tf.estimator.RunConfig(
        session_config=_get_session_config_from_env_var(args))
    #run_config = run_config.replace(model_dir=args.job_dir)
    #print('Model dir %s' % run_config.model_dir)
    estimator = model.build_estimator(
        embedding_size=args.embedding_size,
        # Construct layers sizes with exponential decay
        hidden_units=[
            max(2, int(args.first_layer_size * args.scale_factor**i))
            for i in range(args.num_layers)
        ],
        config=run_config)

    start_time = time.time()
    estimator.train(train_input, max_steps=args.train_steps)
    end_time = time.time()

    print("--------------")
    print("--------------Runing time: ", end_time - start_time)
    print(end_time - start_time)
 def _experiment_fn(output_dir):
   return Experiment(
       model.build_estimator(output_dir),
       train_input_fn=model.get_input_fn(
           filename=os.path.join(data_dir, 'GOOG_series_train.csv'),
           batch_size=train_batch_size),
       eval_input_fn=model.get_input_fn(
           filename=os.path.join(data_dir, 'GOOG_series_validation.csv'),
           batch_size=eval_batch_size),
       train_steps=train_steps,
       eval_steps=eval_steps,
       **experiment_args
   )
Exemple #13
0
def train_and_evaluate(args):
    """Run the training and evaluate using the high level API."""

    def train_input():
        """Input function returning batches from the training
        data set from training.
        """
        return input_module.input_fn(
            args.train_files,
            num_epochs=args.num_epochs,
            batch_size=args.train_batch_size,
            num_parallel_calls=args.num_parallel_calls,
            prefetch_buffer_size=args.prefetch_buffer_size)

    def eval_input():
        """Input function returning the entire validation data
        set for evaluation. Shuffling is not required.
        """
        return input_module.input_fn(
            args.eval_files,
            batch_size=args.eval_batch_size,
            shuffle=False,
            num_parallel_calls=args.num_parallel_calls,
            prefetch_buffer_size=args.prefetch_buffer_size)

    train_spec = tf.estimator.TrainSpec(
        train_input, max_steps=args.train_steps)

    exporter = tf.estimator.FinalExporter(
        'census', input_module.SERVING_FUNCTIONS[args.export_format])
    eval_spec = tf.estimator.EvalSpec(
        eval_input,
        steps=args.eval_steps,
        exporters=[exporter],
        name='census-eval')

    run_config = tf.estimator.RunConfig(
        session_config=_get_session_config_from_env_var())
    run_config = run_config.replace(model_dir=args.job_dir)
    print('Model dir %s' % run_config.model_dir)
    estimator = model.build_estimator(
        embedding_size=args.embedding_size,
        # Construct layers sizes with exponential decay
        hidden_units=[
            max(2, int(args.first_layer_size * args.scale_factor**i))
            for i in range(args.num_layers)
        ],
        config=run_config)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_evaluate(flags):
    """Runs model training and evaluation using TF Estimator API."""

    # Get TF transform metadata generated during preprocessing
    tf_transform_output = tft.TFTransformOutput(flags.input_dir)

    feature_spec = tf_transform_output.transformed_feature_spec()
    train_input_fn = functools.partial(input_util.input_fn,
                                       input_dir=flags.input_dir,
                                       mode=tf.estimator.ModeKeys.TRAIN,
                                       batch_size=flags.train_batch_size,
                                       num_epochs=flags.num_epochs,
                                       label_name=metadata.LABEL_COLUMN,
                                       feature_spec=feature_spec)
    train_spec = tf.estimator.TrainSpec(train_input_fn,
                                        max_steps=flags.train_steps)

    eval_input_fn = functools.partial(input_util.input_fn,
                                      input_dir=flags.input_dir,
                                      mode=tf.estimator.ModeKeys.EVAL,
                                      batch_size=flags.eval_batch_size,
                                      num_epochs=1,
                                      label_name=metadata.LABEL_COLUMN,
                                      feature_spec=feature_spec)

    exporter = tf.estimator.FinalExporter(
        'export',
        functools.partial(input_util.tfrecord_serving_input_fn,
                          feature_spec=feature_spec,
                          label_name=metadata.LABEL_COLUMN))

    eval_spec = tf.estimator.EvalSpec(eval_input_fn,
                                      steps=flags.eval_steps,
                                      start_delay_secs=flags.eval_start_secs,
                                      exporters=[exporter],
                                      name='churn-eval')

    run_config = tf.estimator.RunConfig(
        save_checkpoints_steps=flags.checkpoint_steps,
        tf_random_seed=metadata.SEED,
        model_dir=flags.job_dir)

    feature_columns = model.get_feature_columns(
        tf_transform_output, exclude_columns=metadata.NON_FEATURE_COLUMNS)
    num_intervals = metadata.NUM_INTERVALS
    estimator = model.build_estimator(run_config, flags, feature_columns,
                                      num_intervals)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Exemple #15
0
def run_it(args):
    dp = model.data_pipeline(args.feature_file, args.label_file)

    train_input = lambda: dp.training()
    eval_input = lambda: dp.testing()

    train_spec = tf.estimator.TrainSpec(train_input, max_steps=10000)
    eval_spec = tf.estimator.EvalSpec(eval_input, steps=999)

    run_config = tf.estimator.RunConfig()
    run_config = run_config.replace(model_dir=args.model_dir)

    estimator = model.build_estimator(config=run_config)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def train_and_evaluate(args, l1, l2, lr):

	def train_input():
		return input_module.input_fn(
			args.training_file,
			num_epochs=args.num_epochs,
			shuffle=True,
			batch_size=args.train_batch_size)

	def eval_input():
		return input_module.input_fn(
			args.eval_file,
			num_epochs=100,
			shuffle=False,
			batch_size=1)

	def evaluate_model(estimator):
		eval_inpf = functools.partial(input_module.input_fn, args.eval_file, num_epochs=1, shuffle=False, batch_size=1)
		results = estimator.evaluate(eval_inpf)
		for key,value in sorted(results.items()):
			print('%s: %0.2f' % (key, value))

	def predictions_to_csv(estimator, predictions_on='training'):
		if predictions_on == 'training':
			features = functools.partial(input_module.input_fn, args.training_file, num_epochs=1, shuffle=False, batch_size=1, include_labels=False)
		predictions = []
		preds = estimator.predict(input_fn=features)
		for pred in preds:
			predictions.append(np.argmax(pred['probabilities']))
		predictions = np.asarray(predictions).reshape(len(predictions),1)
		if predictions_on == 'training':
			features = pd.read_csv(args.training_file).values
		results = np.concatenate((features, predictions), axis=1)
		results = pd.DataFrame(results, columns=input_module.PREDICTIONS_COLUMNS).to_csv(os.getcwd() + '/data/%s-predictions.csv' % predictions_on, index=False)

	train_spec = tf.estimator.TrainSpec(train_input, max_steps=args.train_steps)
	exporter = tf.estimator.FinalExporter('model', input_module.SERVING_FUNCTIONS[args.export_format])
	eval_spec = tf.estimator.EvalSpec(eval_input, steps=100, exporters=[exporter], name='model_eval')

	run_config = tf.estimator.RunConfig(session_config=_get_session_config_from_env_var())
	run_config = run_config.replace(model_dir=args.job_dir)
	print('Model dir %s' % run_config.model_dir)

	estimator = model.build_estimator(model_dir=args.job_dir, l1=l1, l2=l2, lr=lr)
	tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
	evaluate_model(estimator)
	predictions_to_csv(estimator)
Exemple #17
0
def run_experiment(hparams):
  """Run the training and evaluate using the high level API"""

  train_input = lambda: model.input_fn(
      hparams.train_files,
      num_epochs=hparams.num_epochs,
      batch_size=hparams.train_batch_size
  )

  # Don't shuffle evaluation data
  eval_input = lambda: model.input_fn(
      hparams.eval_files,
      batch_size=hparams.eval_batch_size,
      shuffle=False
  )

  train_spec = tf.estimator.TrainSpec(train_input,
                                      max_steps=hparams.train_steps
                                      )

  exporter = tf.estimator.FinalExporter('census',
          model.SERVING_FUNCTIONS[hparams.export_format])
  eval_spec = tf.estimator.EvalSpec(eval_input,
                                    steps=hparams.eval_steps,
                                    exporters=[exporter],
                                    name='census-eval'
                                    )

  run_config = tf.estimator.RunConfig()
  run_config = run_config.replace(model_dir=hparams.job_dir)
  print('model dir {}'.format(run_config.model_dir))
  estimator = model.build_estimator(
      embedding_size=hparams.embedding_size,
      # Construct layers sizes with exponetial decay
      hidden_units=[
          max(2, int(hparams.first_layer_size *
                     hparams.scale_factor**i))
          for i in range(hparams.num_layers)
      ],
      config=run_config
  )

  tf.estimator.train_and_evaluate(estimator,
                                  train_spec,
                                  eval_spec)
def train_and_evaluate(args):
    """
    Run the training and evaluate using the high level API.
    """
    def train_input():
        """
        Input function returning batches from the training data set from training.
        """
        return input_module.input_fn(
            args.train_files,
            num_epochs=args.num_epochs,
            batch_size=args.train_batch_size,
            num_parallel_calls=args.num_parallel_calls,
            prefetch_buffer_size=args.prefetch_buffer_size)

    def eval_input():
        """
        Input function returning the entire validation data set for evaluation.
        Shuffling is not required.
        """
        return input_module.input_fn(
            args.eval_files,
            batch_size=args.eval_batch_size,
            shuffle=False,
            num_parallel_calls=args.num_parallel_calls,
            prefetch_buffer_size=args.prefetch_buffer_size)

    train_spec = tf.estimator.TrainSpec(train_input,
                                        max_steps=args.train_steps)

    exporter = tf.estimator.FinalExporter(
        'stores', input_module.SERVING_FUNCTIONS[args.export_format])
    eval_spec = tf.estimator.EvalSpec(eval_input,
                                      steps=args.eval_steps,
                                      exporters=[exporter],
                                      name='stores-eval')

    run_config = tf.estimator.RunConfig(
        session_config=_get_session_config_from_env_var())
    run_config = run_config.replace(model_dir=args.job_dir)
    print('Model dir %s' % run_config.model_dir)
    estimator = model.build_estimator(config=run_config)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
 def _experiment_fn(output_dir):
     return Experiment(model.build_estimator(output_dir),
                       train_input_fn=model.get_input_fn(
                           filename=os.path.join(data_dir,
                                                 'train.tfrecords'),
                           batch_size=train_batch_size),
                       eval_input_fn=model.get_input_fn(
                           filename=os.path.join(data_dir,
                                                 'test.tfrecords'),
                           batch_size=eval_batch_size),
                       export_strategies=[
                           saved_model_export_utils.make_export_strategy(
                               model.serving_input_fn,
                               default_output_alternative_key=None,
                               exports_to_keep=1)
                       ],
                       train_steps=train_steps,
                       eval_steps=eval_steps,
                       **experiment_args)
Exemple #20
0
 def _experiment_fn(output_dir):
     input_fn = model.generate_csv_input_fn
     train_input = input_fn(train_data_paths,
                            num_epochs=num_epochs,
                            batch_size=train_batch_size)
     eval_input = input_fn(eval_data_paths,
                           batch_size=eval_batch_size,
                           mode=tf.contrib.learn.ModeKeys.EVAL)
     return Experiment(
         model.build_estimator(output_dir, hidden_units=hidden_units),
         train_input_fn=train_input,
         eval_input_fn=eval_input,
         export_strategies=[
             saved_model_export_utils.make_export_strategy(
                 model.serving_input_fn,
                 default_output_alternative_key=None,
                 exports_to_keep=1)
         ],
         eval_metrics=model.get_eval_metrics(),
         #min_eval_frequency = 1000,  # change this to speed up training on large datasets
         **experiment_args)
def main(hparams):
    """Run the training and evaluate using the high level API."""

    trn_input = lambda: model.input_fn(hparams.train_files,
                                       batch_size=hparams.train_batch_size)
    train_spec = tf.estimator.TrainSpec(trn_input,
                                        max_steps=hparams.train_steps)

    eval_input = lambda: model.input_fn(
        hparams.eval_files,
        batch_size=hparams.eval_batch_size,
    )

    # Construct our JSON serving function for Online Predictions using GCP.
    exporter = tf.estimator.FinalExporter('model', model.build_serving_fn())
    eval_spec = tf.estimator.EvalSpec(
        eval_input,
        throttle_secs=hparams.eval_secs,
        steps=hparams.eval_steps,
        exporters=[exporter],
    )

    run_config = tf.estimator.RunConfig()
    run_config = run_config.replace(model_dir=hparams.job_dir)
    # Construct layers sizes with exponential decay
    hidden_units = [
        max(2, int(hparams.first_layer_size * hparams.scale_factor**i))
        for i in range(hparams.num_layers)
    ]
    estimator = model.build_estimator(
        config=run_config,
        hidden_units=hidden_units,
        learning_rate=hparams.learning_rate,
        dropout=hparams.dropout,
        embedding_vocab_file=hparams.cpc_embedding_vocab_file,
        embedding_dim=hparams.cpc_embedding_dim,
    )
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Exemple #22
0
def execute(hypes, metadata, job_directory):
    data_directory = 'working_dir/data/%s' % (hypes['data_directory'])
    hypes['data'] = json.loads(
        storage.get('%s/config.json' % data_directory).decode('utf-8'))

    storage.write(json.dumps(hypes, indent=2, sort_keys=True),
                  "%s/hypes.json" % job_directory)

    estimator = model.build_estimator(hypes, metadata, job_directory)

    train_input_fn = model.get_input_fn(hypes, ModeKeys.TRAIN)
    train_steps = hypes['epochs'] * data.length(data_directory, ModeKeys.TRAIN)
    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,
                                        max_steps=train_steps)

    eval_input_fn = model.get_input_fn(hypes, ModeKeys.EVAL)
    eval_spec = tf.estimator.EvalSpec(
        input_fn=eval_input_fn,
        steps=hypes['eval_steps'],
        throttle_secs=hypes['eval_throttle_seconds'])

    # Run the training job
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    net.train(input_fn=input_fn, steps=steps)


def predict(net, input_fn):
    return net.predict(input_fn=input_fn)


def evaluate(net, input_fn):
    return net.evaluate(input_fn=input_fn)


def hopt(net, input_fn, steps):
    pass


nn = M.build_estimator(config, **args.__dict__)

if 'train' == args.mode:
    train(nn, input_fn, args.num_steps)

elif 'eval' == args.mode:
    score = evaluate(nn, input_fn)
    print('score:', score)

elif 'predict' == args.mode:
    prediction = predict(nn, input_fn)
    for i, p in enumerate(prediction):
        print(p)
        if i > 15:
            break
Exemple #24
0
def train_and_maybe_evaluate(hparams):
    """Run the training and evaluate using the high level API.

    Args:
        hparams: Holds hyperparameters used to train the model as name/value pairs.

    Returns:
        The estimator that was used for training (and maybe eval)
    """
    tf_transform_output = tft.TFTransformOutput(hparams.tf_transform_dir)
    tag = hparams.tag

    def train_input():
        return model.input_fn(hparams.train_files,
                              tf_transform_output,
                              batch_size=TRAIN_BATCH_SIZE)

    def eval_input():
        return model.input_fn(hparams.eval_files,
                              tf_transform_output,
                              batch_size=EVAL_BATCH_SIZE)

    train_spec = tf.estimator.TrainSpec(train_input,
                                        max_steps=hparams.train_steps)

    def serving_receiver_fn():
        return model.example_serving_receiver_fn(tf_transform_output)

    exporter = tf.estimator.FinalExporter(tag, serving_receiver_fn)
    eval_spec = tf.estimator.EvalSpec(eval_input,
                                      steps=hparams.eval_steps,
                                      exporters=[exporter],
                                      name='{}-eval'.format(tag))

    run_config = tf.estimator.RunConfig(
        # save_checkpoints_steps=999,
        keep_checkpoint_max=1)

    # serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR)
    serving_model_dir = os.path.join(
        hparams.output_dir, '{}_serving'.format(hparams.first_dnn_layer_size))

    run_config = run_config.replace(model_dir=serving_model_dir)

    estimator = model.build_estimator(
        run_config,
        # Construct layers sizes with exponetial decay
        hidden_units=[
            max(
                2,
                int(hparams.first_dnn_layer_size *
                    hparams.dnn_decay_factor**i))
            for i in range(hparams.num_dnn_layers)
        ],
        wide=hparams.wide)

    estimator = tf.estimator.add_metrics(estimator, my_metric)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

    def eval_input_receiver_fn():
        return model.eval_input_receiver_fn(tf_transform_output)

    eval_model_dir = os.path.join(
        hparams.output_dir, '{}_eval'.format(hparams.first_dnn_layer_size))
    tfma.export.export_eval_savedmodel(
        estimator=estimator,
        export_dir_base=eval_model_dir,
        eval_input_receiver_fn=eval_input_receiver_fn)

    return estimator
Exemple #25
0
def train_and_evaluate(hparams):
    """Run the training and evaluate using the high level API."""

    def train_input():
        """Input function returning batches from the training
        data set from training.
        """
        return input_module.make_training_input_fn(
            hparams.tft_output_dir,
            hparams.train_filebase,
            hparams.weight,
            num_epochs=hparams.num_epochs,
            batch_size=hparams.train_batch_size,
            buffer_size=hparams.buffer_size,
            prefetch_buffer_size=hparams.prefetch_buffer_size)

    def eval_input():
        """Input function returning the entire validation data
        set for evaluation. Shuffling is not required.
        """
        return input_module.make_training_input_fn(
            hparams.tft_output_dir,
            hparams.eval_filebase,
            hparams.weight,
            shuffle=False,
            batch_size=hparams.eval_batch_size,
            buffer_size=hparams.buffer_size,
            prefetch_buffer_size=hparams.prefetch_buffer_size)

    train_spec = tf.estimator.TrainSpec(
        train_input(), max_steps=hparams.train_steps)

    exporter = tf.estimator.FinalExporter(
        'model', input_module.make_serving_input_receiver_fn(
            hparams.tft_output_dir, hparams.schema_file))


    eval_spec = tf.estimator.EvalSpec(
        eval_input(),
        steps=hparams.eval_steps,
        exporters=[exporter],
        name='model-eval')

    run_config = tf.estimator.RunConfig(
        model_dir=os.path.join(hparams.job_dir, hparams.serving_model_dir),
        session_config=_get_session_config_from_env_var(),
        save_checkpoints_steps=999,
        keep_checkpoint_max=1)

    print('Model dir %s' % run_config.model_dir)

    estimator = model.build_estimator(
        tft_output_dir=hparams.tft_output_dir,
        embedding_size=hparams.embedding_size,
        # Construct layers sizes with exponential decay
        weight=hparams.weight,
        hidden_units=[
            max(2, int(hparams.first_layer_size * hparams.scale_factor**i))
            for i in range(hparams.num_layers)
        ],
        config=run_config)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)