def test_read_write(self): saved_opts = training_utils.TrainOptions( model_class=self.model_class, model_params=self.model_params) saved_opts.dump(self.model_dir) loaded_opt = training_utils.TrainOptions.load(model_dir=self.model_dir) self.assertEqual(saved_opts.model_params, loaded_opt.model_params) self.assertEqual(saved_opts.model_class, loaded_opt.model_class)
def test_read_write(self): saved_opts = training_utils.TrainOptions( hparams=self.hparams, model_class=self.model_class, source_vocab_path=self.source_vocab_file, target_vocab_path=self.target_vocab_file) saved_opts.dump(self.model_dir) loaded_opt = training_utils.TrainOptions.load(model_dir=self.model_dir) self.assertEqual(saved_opts.hparams, loaded_opt.hparams) self.assertEqual(saved_opts.model_class, loaded_opt.model_class) self.assertEqual(saved_opts.source_vocab_path, loaded_opt.source_vocab_path) self.assertEqual(saved_opts.target_vocab_path, loaded_opt.target_vocab_path)
def create_experiment(output_dir): """ Creates a new Experiment instance. Args: output_dir: Output directory for model checkpoints and summaries. """ config = run_config.RunConfig( tf_random_seed=FLAGS.tf_random_seed, save_checkpoints_secs=FLAGS.save_checkpoints_secs, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=FLAGS.keep_checkpoint_max, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, gpu_memory_fraction=FLAGS.gpu_memory_fraction) config.tf_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth config.tf_config.log_device_placement = FLAGS.log_device_placement train_options = training_utils.TrainOptions( model_class=FLAGS.model, model_params=FLAGS.model_params) # On the main worker, save training options if config.is_chief: gfile.MakeDirs(output_dir) train_options.dump(output_dir) bucket_boundaries = None if FLAGS.buckets: bucket_boundaries = list(map(int, FLAGS.buckets.split(","))) # Training data input pipeline train_input_pipeline = input_pipeline.make_input_pipeline_from_def( def_dict=FLAGS.input_pipeline_train, mode=tf.contrib.learn.ModeKeys.TRAIN) # Create training input function train_input_fn = training_utils.create_input_fn( pipeline=train_input_pipeline, batch_size=FLAGS.batch_size, bucket_boundaries=bucket_boundaries, mode=tf.contrib.learn.ModeKeys.TRAIN) # Development data input pipeline dev_input_pipeline = input_pipeline.make_input_pipeline_from_def( def_dict=FLAGS.input_pipeline_dev, mode=tf.contrib.learn.ModeKeys.EVAL, shuffle=False, num_epochs=1) # Create eval input function eval_input_fn = training_utils.create_input_fn( pipeline=dev_input_pipeline, batch_size=FLAGS.batch_size, allow_smaller_final_batch=True, mode=tf.contrib.learn.ModeKeys.EVAL) def model_fn(features, labels, params, mode): """Builds the model graph""" model = _create_from_dict( { "class": train_options.model_class, "params": train_options.model_params }, models, mode=mode) return model(features, labels, params) estimator = tf.contrib.learn.Estimator(model_fn=model_fn, model_dir=output_dir, config=config, params=FLAGS.model_params) # Create hooks train_hooks = [] for dict_ in FLAGS.hooks: hook = _create_from_dict(dict_, hooks, model_dir=estimator.model_dir, run_config=config) train_hooks.append(hook) # Create metrics eval_metrics = {} for dict_ in FLAGS.metrics: metric = _create_from_dict(dict_, metric_specs) eval_metrics[metric.name] = metric experiment = PatchedExperiment(estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, min_eval_frequency=FLAGS.eval_every_n_steps, train_steps=FLAGS.train_steps, eval_steps=None, eval_metrics=eval_metrics, train_monitors=train_hooks) return experiment
def create_experiment(output_dir): """ Creates a new Experiment instance. Args: output_dir: Output directory for model checkpoints and summaries. """ config = run_config.RunConfig( tf_random_seed=FLAGS.tf_random_seed, save_checkpoints_secs=FLAGS.save_checkpoints_secs, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=FLAGS.keep_checkpoint_max, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours) # Load vocabulary info source_vocab_info = vocab.get_vocab_info(FLAGS.vocab_source) target_vocab_info = vocab.get_vocab_info(FLAGS.vocab_target) # Find model class model_class = getattr(models, FLAGS.model) # Parse parameter and merge with defaults hparams = model_class.default_params() if FLAGS.hparams is not None and isinstance(FLAGS.hparams, str): hparams = HParamsParser(hparams).parse(FLAGS.hparams) elif isinstance(FLAGS.hparams, dict): hparams.update(FLAGS.hparams) # Print hparams training_utils.print_hparams(hparams) # One the main worker, save training options and vocabulary if config.is_chief: # Copy vocabulary to output directory gfile.MakeDirs(output_dir) source_vocab_path = os.path.join(output_dir, "vocab_source") gfile.Copy(FLAGS.vocab_source, source_vocab_path, overwrite=True) target_vocab_path = os.path.join(output_dir, "vocab_target") gfile.Copy(FLAGS.vocab_target, target_vocab_path, overwrite=True) # Save train options train_options = training_utils.TrainOptions( hparams=hparams, model_class=FLAGS.model, source_vocab_path=source_vocab_path, target_vocab_path=target_vocab_path) train_options.dump(output_dir) # Create model model = model_class(source_vocab_info=source_vocab_info, target_vocab_info=target_vocab_info, params=hparams) bucket_boundaries = None if FLAGS.buckets: bucket_boundaries = list(map(int, FLAGS.buckets.split(","))) # Create training input function train_input_fn = training_utils.create_input_fn( data_provider_fn=functools.partial( data_utils.make_parallel_data_provider, data_sources_source=FLAGS.train_source, data_sources_target=FLAGS.train_target, shuffle=True, num_epochs=FLAGS.train_epochs, delimiter=FLAGS.delimiter), batch_size=FLAGS.batch_size, bucket_boundaries=bucket_boundaries) # Create eval input function eval_input_fn = training_utils.create_input_fn( data_provider_fn=functools.partial( data_utils.make_parallel_data_provider, data_sources_source=FLAGS.dev_source, data_sources_target=FLAGS.dev_target, shuffle=False, num_epochs=1, delimiter=FLAGS.delimiter), batch_size=FLAGS.batch_size) def model_fn(features, labels, params, mode): """Builds the model graph""" return model(features, labels, params, mode) estimator = tf.contrib.learn.estimator.Estimator(model_fn=model_fn, model_dir=output_dir, config=config) train_hooks = training_utils.create_default_training_hooks( estimator=estimator, sample_frequency=FLAGS.sample_every_n_steps, delimiter=FLAGS.delimiter) eval_metrics = { "log_perplexity": metrics.streaming_log_perplexity(), "bleu": metrics.make_bleu_metric_spec(), } experiment = tf.contrib.learn.experiment.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, min_eval_frequency=FLAGS.eval_every_n_steps, train_steps=FLAGS.train_steps, eval_steps=None, eval_metrics=eval_metrics, train_monitors=train_hooks) return experiment
def create_estimator_and_specs(output_dir): sessionConfig = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True) sessionConfig.gpu_options.allow_growth = FLAGS.gpu_allow_growth sessionConfig.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction config = tf.estimator.RunConfig( tf_random_seed=FLAGS.tf_random_seed, save_checkpoints_secs=FLAGS.save_checkpoints_secs, save_checkpoints_steps=FLAGS.save_checkpoints_steps, session_config=sessionConfig, keep_checkpoint_max=FLAGS.keep_checkpoint_max, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours) train_options = training_utils.TrainOptions( model_class=FLAGS.model, model_params=FLAGS.model_params) # On the main worker, save training options if config.is_chief: gfile.MakeDirs(output_dir) train_options.dump(output_dir) bucket_boundaries = None if FLAGS.buckets: bucket_boundaries = list(map(int, FLAGS.buckets.split(","))) # Training data input pipeline train_input_pipeline = input_pipeline.make_input_pipeline_from_def( def_dict=FLAGS.input_pipeline_train, mode=tf.contrib.learn.ModeKeys.TRAIN) # Create training input function train_input_fn = training_utils.create_input_fn( pipeline=train_input_pipeline, batch_size=FLAGS.batch_size, bucket_boundaries=bucket_boundaries, scope="train_input_fn") # Development data input pipeline dev_input_pipeline = input_pipeline.make_input_pipeline_from_def( def_dict=FLAGS.input_pipeline_dev, mode=tf.contrib.learn.ModeKeys.EVAL, shuffle=False, num_epochs=1) # Create eval input function eval_input_fn = training_utils.create_input_fn( pipeline=dev_input_pipeline, batch_size=FLAGS.batch_size, allow_smaller_final_batch=True, scope="dev_input_fn") def model_fn(features, labels, params, mode): """Builds the model graph""" model = _create_from_dict( { "class": train_options.model_class, "params": train_options.model_params }, models, mode=mode) (predictions, loss, train_op) = model(features, labels, params) # Create metrics eval_metrics = {} for dict_ in FLAGS.metrics: metric = _create_from_dict(dict_, metric_specs) eval_metrics[metric.name] = metric(features, labels, predictions) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metrics) estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=output_dir, config=config, params=FLAGS.model_params) # Create hooks train_hooks = [] for dict_ in FLAGS.hooks: hook = _create_from_dict(dict_, hooks, model_dir=estimator.model_dir, run_config=config) train_hooks.append(hook) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=FLAGS.train_steps, hooks=train_hooks) eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn) return (estimator, train_spec, eval_spec)