def test_config_benchmark_file_logger(self): # Set the benchmark_log_dir first since the benchmark_logger_type will need # the value to be set when it does the validation. with flagsaver.flagsaver(benchmark_log_dir="/tmp"): with flagsaver.flagsaver(benchmark_logger_type="BenchmarkFileLogger"): logger.config_benchmark_logger() self.assertIsInstance(logger.get_benchmark_logger(), logger.BenchmarkFileLogger)
def train(flags_obj, model_function, dataset_name): run_config = tf.estimator.RunConfig(save_checkpoints_steps=100000, keep_checkpoint_max=1000) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, params={ 'num_classes': flags_obj.num_classes, 'vocab_size': flags_obj.vocab_size, 'embedding_dim': flags_obj.embedding_dim, 'mlp_dim': flags_obj.mlp_dim, 'kmer': flags_obj.kmer, 'max_len': flags_obj.max_len, 'lr': flags_obj.lr, 'lr_decay': flags_obj.lr_decay, 'cnn_num_filters': flags_obj.cnn_num_filters, 'cnn_filter_sizes': flags_obj.cnn_filter_sizes, 'lstm_dim': flags_obj.lstm_dim, 'pooling_type': flags_obj.pooling_type, 'row': flags_obj.row, 'da': flags_obj.da, 'keep_prob': flags_obj.keep_prob }) run_params = { 'batch_size': flags_obj.batch_size, 'train_epochs': flags_obj.train_epochs, } benchmark_logger = logger.config_benchmark_logger(flags_obj) benchmark_logger.log_run_info('model', dataset_name, run_params) train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, batch_size=flags_obj.batch_size) def input_fn_train(): if flags_obj.encode_method == 'kmer': input_fn = input_function_train_kmer(flags_obj.input_tfrec, flags_obj.train_epochs, flags_obj.batch_size, flags_obj.cpus) if flags_obj.model_name in [ 'embed_pool', 'embed_cnn', 'embed_lstm', 'embed_cnn_no_pool' ]: input_fn = input_function_train_kmer_pad_to_fixed_len( flags_obj.input_tfrec, flags_obj.train_epochs, flags_obj.batch_size, flags_obj.cpus, flags_obj.max_len, flags_obj.kmer) else: input_fn = input_function_train_one_hot(flags_obj.input_tfrec, flags_obj.train_epochs, flags_obj.batch_size, flags_obj.cpus, flags_obj.max_len) return input_fn classifier.train(input_fn=input_fn_train, hooks=train_hooks)
def test_config_benchmark_bigquery_logger(self, mock_bigquery_client): with flagsaver.flagsaver( benchmark_logger_type="BenchmarkBigQueryLogger"): logger.config_benchmark_logger() self.assertIsInstance(logger.get_benchmark_logger(), logger.BenchmarkBigQueryLogger)
def test_config_base_benchmark_logger(self): with flagsaver.flagsaver(benchmark_logger_type="BaseBenchmarkLogger"): logger.config_benchmark_logger() self.assertIsInstance(logger.get_benchmark_logger(), logger.BaseBenchmarkLogger)
def run_wide_deep(flags_obj): """Run Wide-Deep training and eval loop. Args: flags_obj: An object containing parsed flag values. """ # Clean up the model directory if present shutil.rmtree(flags_obj.model_dir, ignore_errors=True) model = build_estimator(flags_obj.model_dir, flags_obj.model_type) train_file = os.path.join(flags_obj.data_dir, 'adult.data') test_file = os.path.join(flags_obj.data_dir, 'adult.test') # Train and evaluate the model every `flags.epochs_between_evals` epochs. def train_input_fn(): return input_fn(train_file, flags_obj.epochs_between_evals, True, flags_obj.batch_size) def eval_input_fn(): return input_fn(test_file, 1, False, flags_obj.batch_size) run_params = { 'batch_size': flags_obj.batch_size, 'train_epochs': flags_obj.train_epochs, 'model_type': flags_obj.model_type, } benchmark_logger = logger.config_benchmark_logger(flags_obj) benchmark_logger.log_run_info('wide_deep', 'Census Income', run_params) loss_prefix = LOSS_PREFIX.get(flags_obj.model_type, '') train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, batch_size=flags_obj.batch_size, tensors_to_log={ 'average_loss': loss_prefix + 'head/truediv', 'loss': loss_prefix + 'head/weighted_loss/Sum' }) # Train and evaluate the model every `flags.epochs_between_evals` epochs. for n in range(flags_obj.train_epochs // flags_obj.epochs_between_evals): model.train(input_fn=train_input_fn, hooks=train_hooks) results = model.evaluate(input_fn=eval_input_fn) # Display evaluation metrics tf.logging.info('Results at epoch %d / %d', (n + 1) * flags_obj.epochs_between_evals, flags_obj.train_epochs) tf.logging.info('-' * 60) for key in sorted(results): tf.logging.info('%s: %s' % (key, results[key])) benchmark_logger.log_evaluation_result(results) if model_helpers.past_stop_threshold(flags_obj.stop_threshold, results['accuracy']): break # Export the model if flags_obj.export_dir is not None: export_model(model, flags_obj.model_type, flags_obj.export_dir)
def resnet_main(flags, model_function, input_function, shape=None): """Shared main loop for ResNet Models. Args: flags: FLAGS object that contains the params for running. See ResnetArgParser for created flags. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. shape: list of ints representing the shape of the images used for training. This is only used if flags.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_function, loss_reduction=tf.losses.Reduction.MEAN) # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace( save_checkpoints_secs=1e9, session_config=session_config) classifier = tf.estimator.Estimator(model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'version': flags.version, 'loss_scale': flags.loss_scale, 'dtype': flags.dtype }) benchmark_logger = logger.config_benchmark_logger(flags.benchmark_log_dir) benchmark_logger.log_run_info('resnet') for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, benchmark_log_dir=flags.benchmark_log_dir) print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_between_evals, flags.num_parallel_calls, flags.multi_gpu) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags.max_train_steps) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags.stop_threshold, eval_results['accuracy']): break if flags.export_dir is not None: warn_on_multi_gpu_export(flags.multi_gpu) # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags.batch_size) classifier.export_savedmodel(flags.export_dir, input_receiver_fn)