Esempio n. 1
0
def find_hyperparams(
        config_file: str,
        train_best_model: bool = typer.Argument(False),
):
    search_config = _load_config(config_file, "search")
    param_grid = search_config["grid"]
    n_jobs = search_config["jobs"]
    metric = _load_config(config_file, "metrics")[0]
    dummy_hyperparams = {name: {} for name in param_grid.keys()}
    estimator = model.build_estimator(dummy_hyperparams)
    scoring = metrics.get_scoring_function(metric["name"], **metric["params"])
    gs = GridSearchCV(
        estimator,
        _param_grid_to_sklearn_format(param_grid),
        n_jobs=n_jobs,
        scoring=scoring,
        verbose=3,
    )
    split = "train"
    X, y = _get_dataset(_load_config(config_file, "data"),
                        splits=[split])[split]
    gs.fit(X, y)
    hyperparams = _param_grid_to_custom_format(gs.best_params_)
    estimator = model.build_estimator(hyperparams)
    output_dir = _load_config(config_file, "export")["output_dir"]
    _save_versioned_estimator(estimator, hyperparams, output_dir)
Esempio n. 2
0
 def _experiment_fn(run_config, hparams):
     # num_epochs can control duration if train_steps isn't
     # passed to Experiment
     train_input = lambda: model.generate_input_fn(
         hparams.train_files,
         num_epochs=hparams.num_epochs,
         batch_size=hparams.train_batch_size,
     )
     # Don't shuffle evaluation data
     eval_input = lambda: model.generate_input_fn(hparams.eval_files,
                                                  batch_size=hparams.
                                                  eval_batch_size,
                                                  shuffle=False)
     return tf.contrib.learn.Experiment(
         model.build_estimator(
             embedding_size=hparams.embedding_size,
             # Construct layers sizes with exponetial decay
             hidden_units=[
                 max(
                     2,
                     int(hparams.first_layer_size *
                         hparams.scale_factor**i))
                 for i in range(hparams.num_layers)
             ],
             config=run_config),
         train_input_fn=train_input,
         eval_input_fn=eval_input,
         **experiment_args)
Esempio n. 3
0
 def _experiment_fn(run_config, hparams):
   # num_epochs can control duration if train_steps isn't
   # passed to Experiment
   train_input = lambda: model.generate_input_fn(
       hparams.train_files,
       num_epochs=hparams.num_epochs,
       batch_size=hparams.train_batch_size,
   )
   # Don't shuffle evaluation data
   eval_input = lambda: model.generate_input_fn(
       hparams.eval_files,
       batch_size=hparams.eval_batch_size,
       shuffle=False
   )
   return tf.contrib.learn.Experiment(
       model.build_estimator(
           embedding_size=hparams.embedding_size,
           # Construct layers sizes with exponetial decay
           hidden_units=[
               max(2, int(hparams.first_layer_size *
                          hparams.scale_factor**i))
               for i in range(hparams.num_layers)
           ],
           config=run_config
       ),
       train_input_fn=train_input,
       eval_input_fn=eval_input,
       **experiment_args
   )
Esempio n. 4
0
 def _experiment_fn(output_dir):
     # num_epochs can control duration if train_steps isn't
     # passed to Experiment
     train_input = model.generate_input_fn(
         train_files,
         num_epochs=num_epochs,
         batch_size=train_batch_size,
     )
     # Don't shuffle evaluation data
     eval_input = model.generate_input_fn(eval_files,
                                          batch_size=eval_batch_size,
                                          shuffle=False)
     return tf.contrib.learn.Experiment(
         model.build_estimator(
             output_dir,
             # Construct layers sizes with exponetial decay
             hidden_units=[
                 max(2, int(first_layer_size * scale_factor**i))
                 for i in range(num_layers)
             ]),
         train_input_fn=train_input,
         eval_input_fn=eval_input,
         # export strategies control the prediction graph structure
         # of exported binaries.
         export_strategies=[
             saved_model_export_utils.make_export_strategy(
                 model.serving_input_fn,
                 default_output_alternative_key=None,
                 exports_to_keep=1)
         ],
         **experiment_args)
Esempio n. 5
0
def train(config_file: str):
    hyperparams = _load_config(config_file, "hyperparams")
    split = "train"
    X, y = _get_dataset(_load_config(config_file, "data"), splits=[split])[split]
    estimator = model.build_estimator(hyperparams)
    estimator.fit(X, y)
    output_dir = _load_config(config_file, "export")["output_dir"]
    version = _save_versioned_estimator(estimator, hyperparams, output_dir)
    return version
Esempio n. 6
0
def train_and_maybe_evaluate(hparams):
  """Run the training and evaluate using the high level API.

  Args:
    hparams: Holds hyperparameters used to train the model as name/value pairs.

  Returns:
    The estimator that was used for training (and maybe eval)
  """
  schema = bookings.read_schema(hparams.schema_file)
  tf_transform_output = tft.TFTransformOutput(hparams.tf_transform_dir)

  train_input = lambda: model.input_fn(
      hparams.train_files,
      tf_transform_output,
      batch_size=TRAIN_BATCH_SIZE
  )

  eval_input = lambda: model.input_fn(
      hparams.eval_files,
      tf_transform_output,
      batch_size=EVAL_BATCH_SIZE
  )

  train_spec = tf.estimator.TrainSpec(
      train_input, max_steps=hparams.train_steps)

  serving_receiver_fn = lambda: model.example_serving_receiver_fn(
      tf_transform_output, schema)

  exporter = tf.estimator.FinalExporter('bookings', serving_receiver_fn)
  eval_spec = tf.estimator.EvalSpec(
      eval_input,
      steps=hparams.eval_steps,
      exporters=[exporter],
      name='bookings-eval')

  run_config = tf.estimator.RunConfig(
      save_checkpoints_steps=999, keep_checkpoint_max=1)

  serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR)
  run_config = run_config.replace(model_dir=serving_model_dir)

  estimator = model.build_estimator(
      tf_transform_output,

      # Construct layers sizes with exponetial decay
      hidden_units=[
          max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i))
          for i in range(NUM_DNN_LAYERS)
      ],
      config=run_config)

  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

  return estimator
Esempio n. 7
0
def train_and_maybe_evaluate(train_files, eval_files, hparams):
  """Run the training and evaluate using the high level API.

  Args:
    hparams: Holds hyperparameters used to train the model as name/value pairs.

  Returns:
    The estimator that was used for training (and maybe eval)
  """
  schema = taxi.read_schema('schema.pbtxt')

  train_input = lambda: model.input_fn(
      train_files,
      hparams.tf_transform_dir,
      batch_size=TRAIN_BATCH_SIZE
  )

  eval_input = lambda: model.input_fn(
      eval_files,
      hparams.tf_transform_dir,
      batch_size=EVAL_BATCH_SIZE
  )

  train_spec = tf.estimator.TrainSpec(
      train_input, max_steps=hparams.train_steps)

  serving_receiver_fn = lambda: model.example_serving_receiver_fn(
      hparams.tf_transform_dir, schema)

  exporter = tf.estimator.FinalExporter('chicago-taxi', serving_receiver_fn)
  eval_spec = tf.estimator.EvalSpec(
      eval_input,
      steps=hparams.eval_steps,
      exporters=[exporter],
      name='chicago-taxi-eval')

  run_config = tf.estimator.RunConfig(
      save_checkpoints_steps=999, keep_checkpoint_max=1)

  serving_model_dir = os.path.join(hparams.output_dir, SERVING_MODEL_DIR)
  run_config = run_config.replace(model_dir=serving_model_dir)

  estimator = model.build_estimator(
      hparams.tf_transform_dir,

      # Construct layers sizes with exponetial decay
      hidden_units=[
          max(2, int(FIRST_DNN_LAYER_SIZE * DNN_DECAY_FACTOR**i))
          for i in range(NUM_DNN_LAYERS)
      ],
      config=run_config)

  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

  return estimator
Esempio n. 8
0
 def _experiment_fn(run_config, hparams):
     # num_epochs can control duration if train_steps isn't
     # passed to Experiment
     train_input = lambda: model.generate_input_fn()
     # Don't shuffle evaluation data
     eval_input = lambda: model.generate_input_fn(shuffle=False)
     return tf.contrib.learn.Experiment(
         model.build_estimator(config=run_config),
         train_input_fn=train_input,
         eval_input_fn=eval_input,
         **experiment_args)
Esempio n. 9
0
 def _experiment_fn(output_dir):
     return tf.contrib.learn.Experiment(
         model.build_estimator(output_dir, n_classes, params, config),
         train_input_fn=model.make_input_fn_stroke(
             files_pattern=os.path.join(data_dir, 'train-*'),
             batch_size=train_batch_size),
         eval_input_fn=model.make_input_fn_stroke(
             files_pattern=os.path.join(data_dir, 'eval-*'),
             batch_size=eval_batch_size),
         export_strategies=[
             tf.contrib.learn.utils.saved_model_export_utils.
             make_export_strategy(model.serving_input_fn, exports_to_keep=1)
         ],
         train_steps=train_steps,
         eval_steps=eval_steps,
         **experiment_args)
Esempio n. 10
0
def train_and_evaluate(args):
  """Run the training and evaluate using the high level API"""
  train_input = model._make_training_input_fn(
      args.tft_working_dir,
      args.train_filebase,
      num_epochs=args.num_epochs,
      batch_size=args.train_batch_size,
      buffer_size=args.train_buffer_size,
      prefetch_buffer_size=args.train_prefetch_buffer_size)

  # Don't shuffle evaluation data.
  eval_input = model._make_training_input_fn(
      args.tft_working_dir,
      args.eval_filebase,
      shuffle=False,
      batch_size=args.eval_batch_size,
      buffer_size=1,
      prefetch_buffer_size=args.eval_prefetch_buffer_size)

  train_spec = tf.estimator.TrainSpec(
      train_input, max_steps=args.train_steps)

  exporter = tf.estimator.FinalExporter(
      'tft_classifier', model._make_serving_input_fn(args.tft_working_dir))

  eval_spec = tf.estimator.EvalSpec(
      eval_input,
      steps=args.eval_steps,
      exporters=[exporter],
      name='tft_classifier-eval')

  run_config = tf.estimator.RunConfig()
  run_config = run_config.replace(model_dir=args.job_dir)

  print('model dir {}'.format(run_config.model_dir))
  estimator = model.build_estimator(
      config=run_config,
      tft_working_dir=args.tft_working_dir,
      embedding_size=args.embedding_size,
      # Construct layers sizes with exponential decay.
      hidden_units=[
          max(2, int(args.first_layer_size * args.scale_factor**i))
          for i in range(args.num_layers)
      ],
  )

  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Esempio n. 11
0
def run_experiment(hparams):
    """Run the training and evaluate using the high level API"""
    train_input = model._make_training_input_fn(
        hparams.tft_working_dir,
        hparams.train_filebase,
        num_epochs=hparams.num_epochs,
        batch_size=hparams.train_batch_size,
        buffer_size=hparams.train_buffer_size,
        prefetch_buffer_size=hparams.train_prefetch_buffer_size)

    # Don't shuffle evaluation data
    eval_input = model._make_training_input_fn(
        hparams.tft_working_dir,
        hparams.eval_filebase,
        shuffle=False,
        batch_size=hparams.eval_batch_size,
        buffer_size=1,
        prefetch_buffer_size=hparams.eval_prefetch_buffer_size)

    train_spec = tf.estimator.TrainSpec(train_input,
                                        max_steps=hparams.train_steps)

    exporter = tf.estimator.FinalExporter(
        'tft_classifier',
        model._make_serving_input_fn(hparams.tft_working_dir))

    eval_spec = tf.estimator.EvalSpec(eval_input,
                                      steps=hparams.eval_steps,
                                      exporters=[exporter],
                                      name='tft_classifier-eval')

    run_config = tf.estimator.RunConfig()
    run_config = run_config.replace(model_dir=hparams.job_dir)

    print('model dir {}'.format(run_config.model_dir))
    estimator = model.build_estimator(
        config=run_config,
        tft_working_dir=hparams.tft_working_dir,
        embedding_size=hparams.embedding_size,
        # Construct layers sizes with exponetial decay
        hidden_units=[
            max(2, int(hparams.first_layer_size * hparams.scale_factor**i))
            for i in range(hparams.num_layers)
        ],
    )

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Esempio n. 12
0
 def _experiment_fn(output_dir):
   return Experiment(
       model.build_estimator(output_dir),
       train_input_fn=model.get_input_fn(
           filename=os.path.join(data_dir, 'train.tfrecords'),
           batch_size=train_batch_size),
       eval_input_fn=model.get_input_fn(
           filename=os.path.join(data_dir, 'test.tfrecords'),
           batch_size=eval_batch_size),
       export_strategies=[saved_model_export_utils.make_export_strategy(
           model.serving_input_fn,
           default_output_alternative_key=None,
           exports_to_keep=1)],
       train_steps=train_steps,
       eval_steps=eval_steps,
       **experiment_args
   )
Esempio n. 13
0
def run_experiment(hparams):
  """Run the training and evaluate using the high level API"""

  train_input = lambda: model.input_fn(
      hparams.train_files,
      num_epochs=hparams.num_epochs,
      batch_size=hparams.train_batch_size
  )

  # Don't shuffle evaluation data
  eval_input = lambda: model.input_fn(
      hparams.eval_files,
      batch_size=hparams.eval_batch_size,
      shuffle=False
  )

  train_spec = tf.estimator.TrainSpec(train_input,
                                      max_steps=hparams.train_steps
                                      )

  exporter = tf.estimator.FinalExporter('census',
          model.SERVING_FUNCTIONS[hparams.export_format])
  eval_spec = tf.estimator.EvalSpec(eval_input,
                                    steps=hparams.eval_steps,
                                    exporters=[exporter],
                                    name='census-eval'
                                    )

  run_config = tf.estimator.RunConfig()
  run_config = run_config.replace(model_dir=hparams.job_dir)
  print('model dir {}'.format(run_config.model_dir))
  estimator = model.build_estimator(
      embedding_size=hparams.embedding_size,
      # Construct layers sizes with exponetial decay
      hidden_units=[
          max(2, int(hparams.first_layer_size *
                     hparams.scale_factor**i))
          for i in range(hparams.num_layers)
      ],
      config=run_config
  )

  tf.estimator.train_and_evaluate(estimator,
                                  train_spec,
                                  eval_spec)
Esempio n. 14
0
 def _experiment_fn(output_dir):
   return tf.contrib.learn.Experiment(
       model.build_estimator(output_dir, n_classes, params, config),
       train_input_fn=model.make_input_fn(
         files_pattern=os.path.join(data_dir, 'train-*'),
         batch_size=train_batch_size),
       eval_input_fn=model.make_input_fn(
         files_pattern=os.path.join(data_dir, 'eval-*'),
         batch_size=eval_batch_size),
       export_strategies=[
           tf.contrib.learn.utils.saved_model_export_utils.make_export_strategy(
               model.serving_input_fn,
               exports_to_keep=1)
       ],
       train_steps=train_steps,
       eval_steps=eval_steps,
       **experiment_args
       )
Esempio n. 15
0
 def _experiment_fn(output_dir):
     train_input = model.generate_input_fn(train_file,
                                           num_epochs=num_epochs,
                                           batch_size=train_batch_size)
     eval_input = model.generate_input_fn(eval_file,
                                          batch_size=eval_batch_size)
     return Experiment(model.build_estimator(job_dir,
                                             embedding_size=embedding_size,
                                             hidden_units=hidden_units),
                       train_input_fn=train_input,
                       eval_input_fn=eval_input,
                       export_strategies=[
                           saved_model_export_utils.make_export_strategy(
                               model.serving_input_fn,
                               default_output_alternative_key=None,
                               exports_to_keep=1)
                       ],
                       **experiment_args)
Esempio n. 16
0
 def _experiment_fn(run_config, hparams):
     """Definicion de experimento"""
     # Funcion de entrada de entrenamiento
     train_input = lambda: model.generate_input_fn(
         hparams.train_files,
         num_epochs=hparams.num_epochs,
         batch_size=hparams.train_batch_size,
     )
     # Funcion de entrada de evaluacion
     eval_input = lambda: model.generate_input_fn(
         hparams.eval_files,
         batch_size=hparams.eval_batch_size
     )
     # Experimento
     return tf.contrib.learn.Experiment(
         model.build_estimator(hparams.job_dir),
         train_input_fn=train_input,
         eval_input_fn=eval_input,
         **experiment_args
     )
Esempio n. 17
0
File: task.py Progetto: gachet/GCP
 def _experiment_fn(output_dir):
     input_fn = model.generate_csv_input_fn
     train_input = input_fn(train_data_paths,
                            num_epochs=num_epochs,
                            batch_size=train_batch_size)
     eval_input = input_fn(eval_data_paths,
                           batch_size=eval_batch_size,
                           mode=tf.contrib.learn.ModeKeys.EVAL)
     return Experiment(model.build_estimator(output_dir,
                                             hidden_units=hidden_units),
                       train_input_fn=train_input,
                       eval_input_fn=eval_input,
                       export_strategies=[
                           saved_model_export_utils.make_export_strategy(
                               model.serving_input_fn,
                               default_output_alternative_key=None,
                               exports_to_keep=1)
                       ],
                       eval_metrics=model.get_eval_metrics(),
                       **experiment_args)
Esempio n. 18
0
 def _experiment_fn(output_dir):
   input_fn = model.generate_csv_input_fn
   train_input = input_fn(
       train_data_paths, num_epochs=num_epochs, batch_size=train_batch_size)
   eval_input = input_fn(
       eval_data_paths, batch_size=eval_batch_size, mode=tf.contrib.learn.ModeKeys.EVAL)
   return Experiment(
       model.build_estimator(
           output_dir,
           hidden_units=hidden_units
       ),
       train_input_fn=train_input,
       eval_input_fn=eval_input,
       export_strategies=[saved_model_export_utils.make_export_strategy(
           model.serving_input_fn,
           default_output_alternative_key=None,
           exports_to_keep=1
       )],
       eval_metrics=model.get_eval_metrics(),
       #min_eval_frequency = 1000,  # change this to speed up training on large datasets
       **experiment_args
   )
Esempio n. 19
0
 def _experiment_fn(output_dir):
   input_fn = (model.generate_csv_input_fn if format == 'csv' 
                else model.generate_tfrecord_input_fn)
   train_input = input_fn(
       train_data_paths, num_epochs=num_epochs, batch_size=train_batch_size)
   eval_input = input_fn(
       eval_data_paths, batch_size=eval_batch_size, mode=tf.contrib.learn.ModeKeys.EVAL)
   return Experiment(
       model.build_estimator(
           output_dir,
           nbuckets=nbuckets,
           hidden_units=parse_to_int(hidden_units)
       ),
       train_input_fn=train_input,
       eval_input_fn=eval_input,
       export_strategies=[saved_model_export_utils.make_export_strategy(
           model.serving_input_fn,
           default_output_alternative_key=None,
           exports_to_keep=1
       )],
       eval_metrics=model.get_eval_metrics(),
       #min_eval_frequency = 1000,  # change this to speed up training on large datasets
       **experiment_args
   )
Esempio n. 20
0
 def _experiment_fn(output_dir):
     train_input = model.generate_input_fn(train_file,
                                           num_epochs=num_epochs,
                                           batch_size=train_batch_size)
     eval_input = model.generate_input_fn(eval_file,
                                          batch_size=eval_batch_size)
     return learn.Experiment(
         model.build_estimator(job_dir,
                               embedding_size=embedding_size,
                               hidden_units=hidden_units),
         train_input_fn=train_input,
         eval_input_fn=eval_input,
         eval_metrics={
             'training/hptuning/metric':
             learn.MetricSpec(metric_fn=metrics.streaming_accuracy,
                              prediction_key='logits')
         },
         export_strategies=[
             saved_model_export_utils.make_export_strategy(
                 model.serving_input_fn,
                 default_output_alternative_key=None,
                 exports_to_keep=1)
         ],
         **experiment_args)
Esempio n. 21
0
    parser.add_argument(
        '--job-dir',
        help='GCS location to write checkpoints and export models',
        required=True)

    # Argument to turn on all logging
    parser.add_argument(
        '--verbosity',
        choices=['DEBUG', 'ERROR', 'FATAL', 'INFO', 'WARN'],
        default='INFO',
    )

    args = parser.parse_args()

    run_config = run_config.RunConfig(model_dir=args.job_dir)
    estimator = model.build_estimator(run_config)

    def prediction_input_fn():
        feature_placeholders = {
            'wvec': tf.placeholder(tf.float32, [1, 2, 3]),
            'dvec': tf.placeholder(tf.float32, [1, 2, 3]),
        }
        features = {
            key: tf.expand_dims(tensor, -1)
            for key, tensor in feature_placeholders.items()
        }

        return tf.contrib.learn.InputFnOps(features, None,
                                           feature_placeholders)

    predictor = from_contrib_estimator(estimator=estimator,
Esempio n. 22
0
# df['signup_date'] = df['signup_date'].apply(lambda x: start + timedelta(days=x))
predict_df['last_service_use_date'] = predict_df[
    'last_service_use_date'].apply(lambda x: start + timedelta(days=x))

# df.rename(columns={'Unnamed: 0': 'user_id'}, inplace=True)

# Get user's recency
predict_df['recency'] = predict_df['last_service_use_date'].apply(
    lambda x: (predict_df.last_service_use_date.max() - x).days)

# Convert True False to 0 & 1
predict_df.loc[predict_df['business_service'] == True,
               'business_service'] = '1'
predict_df.loc[predict_df['business_service'] == False,
               'business_service'] = '0'

predict_df['is_retained'] = 0
# df.loc[df['last_service_use_date'].dt.month.isin([6,7]), 'is_retained'] = 1

predict_df.business_service = predict_df.business_service.astype(str)
predict_df.dropna(inplace=True)

m = build_estimator('model_dir')
predicted_values = list(m.predict(input_fn=lambda: input_fn(predict_df)))
probs = list(m.predict_proba(input_fn=lambda: input_fn(predict_df)))

predict_df['predicted_values'] = predicted_values
predict_df['probs'] = probs

predict_df.to_csv('predicttions.csv')
Esempio n. 23
0
def train_and_evaluate_model(config, hparams):
    """Runs the local training job given provided command line arguments.

  Args:
    config: RunConfig object
    hparams: dictionary passed by command line arguments

  """

    with open(hparams['train_file']) as f:
        if hparams['trainer_type'] == 'spam':
            contents, labels, _ = train_ml_helpers.spam_from_file(f)
        else:
            contents, labels = train_ml_helpers.component_from_file(f)

    logger.info('Training data received. Len: %d' % len(contents))

    # Generate features and targets from extracted contents and labels.
    if hparams['trainer_type'] == 'spam':
        features, targets = train_ml_helpers \
          .transform_spam_csv_to_features(contents, labels)
    else:
        top_list = top_words.make_top_words_list(contents, hparams['job_dir'])
        features, targets, index_to_component = train_ml_helpers \
          .transform_component_csv_to_features(contents, labels, top_list)

    # Split training and testing set.
    logger.info('Features generated')
    features_train, features_test, targets_train, targets_test = train_test_split(
        features, targets, test_size=0.2, random_state=42)

    # Generate TrainSpec and EvalSpec for train and evaluate.
    estimator = model.build_estimator(config=config,
                                      job_dir=hparams['job_dir'],
                                      trainer_type=hparams['trainer_type'],
                                      class_count=len(set(labels)))
    exporter = tf.estimator.LatestExporter(
        name='saved_model',
        serving_input_receiver_fn=generate_json_input_fn(
            hparams['trainer_type']))

    train_spec = tf.estimator.TrainSpec(input_fn=make_input_fn(
        hparams['trainer_type'],
        features_train,
        targets_train,
        num_epochs=hparams['num_epochs'],
        batch_size=hparams['train_batch_size']),
                                        max_steps=hparams['train_steps'])
    eval_spec = tf.estimator.EvalSpec(input_fn=make_input_fn(
        hparams['trainer_type'],
        features_test,
        targets_test,
        shuffle=False,
        batch_size=hparams['eval_batch_size']),
                                      exporters=exporter,
                                      steps=hparams['eval_steps'])

    if hparams['trainer_type'] == 'component':
        store_component_conversion(hparams['job_dir'], index_to_component)

    result = tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    logging.info(result)