Example #1
0
 def test_config_benchmark_file_logger(self):
   # Set the benchmark_log_dir first since the benchmark_logger_type will need
   # the value to be set when it does the validation.
   with flagsaver.flagsaver(benchmark_log_dir="/tmp"):
     with flagsaver.flagsaver(benchmark_logger_type="BenchmarkFileLogger"):
       logger.config_benchmark_logger()
       self.assertIsInstance(logger.get_benchmark_logger(),
                             logger.BenchmarkFileLogger)
Example #2
0
def train(flags_obj, model_function, dataset_name):
    run_config = tf.estimator.RunConfig(save_checkpoints_steps=100000,
                                        keep_checkpoint_max=1000)

    classifier = tf.estimator.Estimator(
        model_fn=model_function,
        model_dir=flags_obj.model_dir,
        config=run_config,
        params={
            'num_classes': flags_obj.num_classes,
            'vocab_size': flags_obj.vocab_size,
            'embedding_dim': flags_obj.embedding_dim,
            'mlp_dim': flags_obj.mlp_dim,
            'kmer': flags_obj.kmer,
            'max_len': flags_obj.max_len,
            'lr': flags_obj.lr,
            'lr_decay': flags_obj.lr_decay,
            'cnn_num_filters': flags_obj.cnn_num_filters,
            'cnn_filter_sizes': flags_obj.cnn_filter_sizes,
            'lstm_dim': flags_obj.lstm_dim,
            'pooling_type': flags_obj.pooling_type,
            'row': flags_obj.row,
            'da': flags_obj.da,
            'keep_prob': flags_obj.keep_prob
        })

    run_params = {
        'batch_size': flags_obj.batch_size,
        'train_epochs': flags_obj.train_epochs,
    }
    benchmark_logger = logger.config_benchmark_logger(flags_obj)
    benchmark_logger.log_run_info('model', dataset_name, run_params)

    train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks,
                                               batch_size=flags_obj.batch_size)

    def input_fn_train():
        if flags_obj.encode_method == 'kmer':
            input_fn = input_function_train_kmer(flags_obj.input_tfrec,
                                                 flags_obj.train_epochs,
                                                 flags_obj.batch_size,
                                                 flags_obj.cpus)
            if flags_obj.model_name in [
                    'embed_pool', 'embed_cnn', 'embed_lstm',
                    'embed_cnn_no_pool'
            ]:
                input_fn = input_function_train_kmer_pad_to_fixed_len(
                    flags_obj.input_tfrec, flags_obj.train_epochs,
                    flags_obj.batch_size, flags_obj.cpus, flags_obj.max_len,
                    flags_obj.kmer)
        else:
            input_fn = input_function_train_one_hot(flags_obj.input_tfrec,
                                                    flags_obj.train_epochs,
                                                    flags_obj.batch_size,
                                                    flags_obj.cpus,
                                                    flags_obj.max_len)

        return input_fn

    classifier.train(input_fn=input_fn_train, hooks=train_hooks)
 def test_config_benchmark_bigquery_logger(self, mock_bigquery_client):
     with flagsaver.flagsaver(
             benchmark_logger_type="BenchmarkBigQueryLogger"):
         logger.config_benchmark_logger()
         self.assertIsInstance(logger.get_benchmark_logger(),
                               logger.BenchmarkBigQueryLogger)
 def test_config_base_benchmark_logger(self):
     with flagsaver.flagsaver(benchmark_logger_type="BaseBenchmarkLogger"):
         logger.config_benchmark_logger()
         self.assertIsInstance(logger.get_benchmark_logger(),
                               logger.BaseBenchmarkLogger)
def run_wide_deep(flags_obj):
    """Run Wide-Deep training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """

    # Clean up the model directory if present
    shutil.rmtree(flags_obj.model_dir, ignore_errors=True)
    model = build_estimator(flags_obj.model_dir, flags_obj.model_type)

    train_file = os.path.join(flags_obj.data_dir, 'adult.data')
    test_file = os.path.join(flags_obj.data_dir, 'adult.test')

    # Train and evaluate the model every `flags.epochs_between_evals` epochs.
    def train_input_fn():
        return input_fn(train_file, flags_obj.epochs_between_evals, True,
                        flags_obj.batch_size)

    def eval_input_fn():
        return input_fn(test_file, 1, False, flags_obj.batch_size)

    run_params = {
        'batch_size': flags_obj.batch_size,
        'train_epochs': flags_obj.train_epochs,
        'model_type': flags_obj.model_type,
    }

    benchmark_logger = logger.config_benchmark_logger(flags_obj)
    benchmark_logger.log_run_info('wide_deep', 'Census Income', run_params)

    loss_prefix = LOSS_PREFIX.get(flags_obj.model_type, '')
    train_hooks = hooks_helper.get_train_hooks(
        flags_obj.hooks,
        batch_size=flags_obj.batch_size,
        tensors_to_log={
            'average_loss': loss_prefix + 'head/truediv',
            'loss': loss_prefix + 'head/weighted_loss/Sum'
        })

    # Train and evaluate the model every `flags.epochs_between_evals` epochs.
    for n in range(flags_obj.train_epochs // flags_obj.epochs_between_evals):
        model.train(input_fn=train_input_fn, hooks=train_hooks)
        results = model.evaluate(input_fn=eval_input_fn)

        # Display evaluation metrics
        tf.logging.info('Results at epoch %d / %d',
                        (n + 1) * flags_obj.epochs_between_evals,
                        flags_obj.train_epochs)
        tf.logging.info('-' * 60)

        for key in sorted(results):
            tf.logging.info('%s: %s' % (key, results[key]))

        benchmark_logger.log_evaluation_result(results)

        if model_helpers.past_stop_threshold(flags_obj.stop_threshold,
                                             results['accuracy']):
            break

    # Export the model
    if flags_obj.export_dir is not None:
        export_model(model, flags_obj.model_type, flags_obj.export_dir)
def resnet_main(flags, model_function, input_function, shape=None):
    """Shared main loop for ResNet Models.

  Args:
    flags: FLAGS object that contains the params for running. See
      ResnetArgParser for created flags.
    model_function: the function that instantiates the Model and builds the
      ops for train/eval. This will be passed directly into the estimator.
    input_function: the function that processes the dataset and returns a
      dataset that the estimator can train on. This will be wrapped with
      all the relevant flags for running and passed to estimator.
    shape: list of ints representing the shape of the images used for training.
      This is only used if flags.export_dir is passed.
  """

    # Using the Winograd non-fused algorithms provides a small performance boost.
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    if flags.multi_gpu:
        validate_batch_size_for_multi_gpu(flags.batch_size)

        # There are two steps required if using multi-GPU: (1) wrap the model_fn,
        # and (2) wrap the optimizer. The first happens here, and (2) happens
        # in the model_fn itself when the optimizer is defined.
        model_function = tf.contrib.estimator.replicate_model_fn(
            model_function, loss_reduction=tf.losses.Reduction.MEAN)

    # Create session config based on values of inter_op_parallelism_threads and
    # intra_op_parallelism_threads. Note that we default to having
    # allow_soft_placement = True, which is required for multi-GPU and not
    # harmful for other modes.
    session_config = tf.ConfigProto(
        inter_op_parallelism_threads=flags.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags.intra_op_parallelism_threads,
        allow_soft_placement=True)

    # Set up a RunConfig to save checkpoint and set session config.
    run_config = tf.estimator.RunConfig().replace(
        save_checkpoints_secs=1e9, session_config=session_config)
    classifier = tf.estimator.Estimator(model_fn=model_function,
                                        model_dir=flags.model_dir,
                                        config=run_config,
                                        params={
                                            'resnet_size': flags.resnet_size,
                                            'data_format': flags.data_format,
                                            'batch_size': flags.batch_size,
                                            'multi_gpu': flags.multi_gpu,
                                            'version': flags.version,
                                            'loss_scale': flags.loss_scale,
                                            'dtype': flags.dtype
                                        })

    benchmark_logger = logger.config_benchmark_logger(flags.benchmark_log_dir)
    benchmark_logger.log_run_info('resnet')

    for _ in range(flags.train_epochs // flags.epochs_between_evals):
        train_hooks = hooks_helper.get_train_hooks(
            flags.hooks,
            batch_size=flags.batch_size,
            benchmark_log_dir=flags.benchmark_log_dir)

        print('Starting a training cycle.')

        def input_fn_train():
            return input_function(True, flags.data_dir, flags.batch_size,
                                  flags.epochs_between_evals,
                                  flags.num_parallel_calls, flags.multi_gpu)

        classifier.train(input_fn=input_fn_train,
                         hooks=train_hooks,
                         max_steps=flags.max_train_steps)

        print('Starting to evaluate.')

        # Evaluate the model and print results
        def input_fn_eval():
            return input_function(False, flags.data_dir, flags.batch_size, 1,
                                  flags.num_parallel_calls, flags.multi_gpu)

        # flags.max_train_steps is generally associated with testing and profiling.
        # As a result it is frequently called with synthetic data, which will
        # iterate forever. Passing steps=flags.max_train_steps allows the eval
        # (which is generally unimportant in those circumstances) to terminate.
        # Note that eval will run for max_train_steps each loop, regardless of the
        # global_step count.
        eval_results = classifier.evaluate(input_fn=input_fn_eval,
                                           steps=flags.max_train_steps)

        benchmark_logger.log_evaluation_result(eval_results)

        if model_helpers.past_stop_threshold(flags.stop_threshold,
                                             eval_results['accuracy']):
            break

    if flags.export_dir is not None:
        warn_on_multi_gpu_export(flags.multi_gpu)

        # Exports a saved model for the given classifier.
        input_receiver_fn = export.build_tensor_serving_input_receiver_fn(
            shape, batch_size=flags.batch_size)
        classifier.export_savedmodel(flags.export_dir, input_receiver_fn)