def translate_list(vocab, model_dir, params, contentList): translation_results = [] subtokenizer = tokenizer.Subtokenizer(vocab) estimator = tf.estimator.Estimator(model_fn=transformer_main.model_fn, model_dir=model_dir, params=params) estimator_predictor = tf.contrib.predictor.from_estimator( estimator, export.build_tensor_serving_input_receiver_fn(shape=[None], dtype=tf.int32, batch_size=None)) for content in contentList: try: tokens = _encode_and_add_eos(content, subtokenizer) predictions = estimator_predictor( {"input": np.array([tokens], dtype=np.int32)}) translation = _trim_and_decode(predictions["outputs"][0], subtokenizer) translation_results.append(translation) except: print("error in translation") return translation_results
def export_pb(flags_core, flags_obj, shape, classifier, ir_eval=False): export_dtype = flags_core.get_tf_dtype(flags_obj) if not flags_obj.data_format: raise ValueError( 'The `data_format` must be specified: channels_first or channels_last ' ) bin_export_path = os.path.join(flags_obj.export_dir, flags_obj.data_format, 'binary_input') bin_input_receiver_fn = functools.partial( image_bytes_serving_input_fn, shape, flags_obj.export_decoder_type, dtype=export_dtype, pptype=flags_obj.preprocessing_type) pp_export_path = os.path.join(flags_obj.export_dir, flags_obj.data_format, 'preprocessed_input') pp_input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=None, dtype=export_dtype) result_bin_export_path = classifier.export_savedmodel( bin_export_path, bin_input_receiver_fn) classifier.export_savedmodel(pp_export_path, pp_input_receiver_fn) if flags_obj.export_decoder_type == 'jpeg': metric = export_test(result_bin_export_path, flags_obj, ir_eval) msg = 'IMPOTANT! Evaluation metric of exported saved_model.pb is {}'.format( metric) tf.logging.info(msg) with tf.gfile.Open( result_bin_export_path.decode("utf-8") + '/model_performance.txt', 'w') as fp: fp.write(msg)
def test_build_tensor_serving_input_receiver_fn(self): receiver_fn = export.build_tensor_serving_input_receiver_fn(shape=[4, 5]) with tf.Graph().as_default(): receiver = receiver_fn() self.assertIsInstance( receiver, tf.estimator.export.TensorServingInputReceiver) self.assertIsInstance(receiver.features, tf.Tensor) self.assertEqual(receiver.features.shape, tf.TensorShape([1, 4, 5])) self.assertEqual(receiver.features.dtype, tf.float32) self.assertIsInstance(receiver.receiver_tensors, dict) # Note that Python 3 can no longer index .values() directly; cast to list. self.assertEqual(list(receiver.receiver_tensors.values())[0].shape, tf.TensorShape([1, 4, 5]))
def export_pb(flags_core, flags_obj, shape, classifier): export_dtype = flags_core.get_tf_dtype(flags_obj) if not flags_obj.data_format: raise ValueError( 'The `data_format` must be specified: channels_first or channels_last ' ) bin_export_path = os.path.join(flags_obj.export_dir, flags_obj.data_format, 'binary_input') bin_input_receiver_fn = functools.partial(image_bytes_serving_input_fn, shape, dtype=export_dtype) pp_export_path = os.path.join(flags_obj.export_dir, flags_obj.data_format, 'preprocessed_input') pp_input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=None, dtype=export_dtype) classifier.export_savedmodel(bin_export_path, bin_input_receiver_fn) classifier.export_savedmodel(pp_export_path, pp_input_receiver_fn)
def resnet_main(flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ model_helpers.apply_clean(flags.FLAGS) # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy, session_config=session_config) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj) }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('resnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) def input_fn_train(): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=flags_obj.epochs_between_evals, num_gpus=flags_core.get_num_gpus(flags_obj)) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1) total_training_cycle = (flags_obj.train_epochs // flags_obj.epochs_between_evals) for cycle_index in range(total_training_cycle): tf.logging.info('Starting a training cycle: %d/%d', cycle_index, total_training_cycle) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags_obj.max_train_steps) tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) tf.logging.info( "Convert types of eval_results (np.xx types) to std python (needed for json.dump():" ) for key, value in eval_results.items(): eval_results[key] = value.item() #print(key, eval_results[key], type(eval_results[key])) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags_obj.stop_threshold, eval_results['accuracy']): break if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size) savedmodel_raw_path = classifier.export_savedmodel( flags_obj.export_dir, input_receiver_fn) savedmodel_raw_path = savedmodel_raw_path.decode( ) # convert a byte string to a normal string if (flags_obj.benchmark_logger_type == "BenchmarkFileLogger" and hasattr(flags_obj, "benchmark_log_dir")): log_dir = flags_obj.benchmark_log_dir benchmark_log_path = os.path.join( log_dir, logger.BENCHMARK_RUN_LOG_FILE_NAME) # attempt to identify a number of GPUs or was it on CPU with open(benchmark_log_path, "r") as json_file: log = json.load(json_file) num_gpus = int(log["machine_config"]["gpu_info"]["count"]) gpu_info = "" if num_gpus == 0: gpu_info = "_cpu" elif num_gpus > 0: gpu_info = "_" + str(num_gpus) + "gpu" # add "_cpu" or "_gpu" to just saved graph directory savedmodel_path = savedmodel_raw_path + gpu_info shutil.move(savedmodel_raw_path, savedmodel_path) # move benchmark_run.log and metric.log to the just saved graph shutil.move(benchmark_log_path, savedmodel_path) shutil.move(os.path.join(log_dir, logger.METRIC_LOG_FILE_NAME), savedmodel_path) tf.logging.info("[INFO]: SavedModel path: %s", savedmodel_path) # zip the trained graph, aka savedmodel: # adapted from https://stackoverflow.com/questions/1855095/how-to-create-a-zip-archive-of-a-directory-in-python # full path to the zip file graph_zip_path = savedmodel_path + '.zip' # cd to the directory with the trained graph os.chdir(os.path.dirname(savedmodel_path.rstrip('/'))) dir_to_zip = savedmodel_path.rstrip('/').split('/')[-1] graph_zip = zipfile.ZipFile(graph_zip_path, 'w', zipfile.ZIP_DEFLATED) for root, dirs, files in os.walk(dir_to_zip): for file in files: graph_zip.write(os.path.join(root, file)) graph_zip.close() return graph_zip_path
def resnet_main( flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. Returns: Dict of results of the run. """ model_helpers.apply_clean(flags.FLAGS) # Ensures flag override logic is only executed if explicitly triggered. if flags_obj.tf_gpu_thread_mode: override_flags_and_set_envars_for_gpu_thread_pool(flags_obj) # Creates session config. allow_soft_placement = True, is required for # multi-GPU and is not harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) # Creates a `RunConfig` that checkpoints every 24 hours which essentially # results in checkpoints determined only by `epochs_between_evals`. run_config = tf.estimator.RunConfig( train_distribute=distribution_strategy, session_config=session_config, save_checkpoints_secs=60*60*24) # Initializes model with all but the dense layer from pretrained ResNet. if flags_obj.pretrained_model_checkpoint_path is not None: warm_start_settings = tf.estimator.WarmStartSettings( flags_obj.pretrained_model_checkpoint_path, vars_to_warm_start='^(?!.*dense)') else: warm_start_settings = None classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, warm_start_from=warm_start_settings, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj), 'fine_tune': flags_obj.fine_tune }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('resnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) def input_fn_train(num_epochs): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=num_epochs, dtype=flags_core.get_tf_dtype(flags_obj), datasets_num_private_threads=flags_obj.datasets_num_private_threads, num_parallel_batches=flags_obj.datasets_num_parallel_batches) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1, dtype=flags_core.get_tf_dtype(flags_obj)) if flags_obj.eval_only or not flags_obj.train_epochs: # If --eval_only is set, perform a single loop with zero train epochs. schedule, n_loops = [0], 1 else: # Compute the number of times to loop while training. All but the last # pass will train for `epochs_between_evals` epochs, while the last will # train for the number needed to reach `training_epochs`. For instance if # train_epochs = 25 and epochs_between_evals = 10 # schedule will be set to [10, 10, 5]. That is to say, the loop will: # Train for 10 epochs and then evaluate. # Train for another 10 epochs and then evaluate. # Train for a final 5 epochs (to reach 25 epochs) and then evaluate. n_loops = math.ceil(flags_obj.train_epochs / flags_obj.epochs_between_evals) schedule = [flags_obj.epochs_between_evals for _ in range(int(n_loops))] schedule[-1] = flags_obj.train_epochs - sum(schedule[:-1]) # over counting. for cycle_index, num_train_epochs in enumerate(schedule): tf.logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops)) if num_train_epochs: classifier.train(input_fn=lambda: input_fn_train(num_train_epochs), hooks=train_hooks, max_steps=flags_obj.max_train_steps) tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold( flags_obj.stop_threshold, eval_results['accuracy']): break if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. export_dtype = flags_core.get_tf_dtype(flags_obj) if flags_obj.image_bytes_as_serving_input: input_receiver_fn = functools.partial( image_bytes_serving_input_fn, shape, dtype=export_dtype) else: input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size, dtype=export_dtype) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn, strip_default_attrs=True) return eval_results
def resnet_main(flags, model_function, input_function, shape=None): """Shared main loop for ResNet Models. Args: flags: FLAGS object that contains the params for running. See ResnetArgParser for created flags. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. shape: list of ints representing the shape of the images used for training. This is only used if flags.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_function, loss_reduction=tf.losses.Reduction.MEAN) # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9, session_config=session_config) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'version': flags.version, }) if flags.benchmark_log_dir is not None: benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir) benchmark_logger.log_run_info("resnet") else: benchmark_logger = None for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, benchmark_log_dir=flags.benchmark_log_dir) print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_between_evals, flags.num_parallel_calls, flags.multi_gpu) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags.max_train_steps) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results) if benchmark_logger: benchmark_logger.log_estimator_evaluation_result(eval_results) if flags.export_dir is not None: warn_on_multi_gpu_export(flags.multi_gpu) # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags.batch_size) classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
def vgg_main( flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for VGG Models. Args: flags_obj: An object containing parsed flags. See define_vgg_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. Returns: Dict of results of the run. """ model_helpers.apply_clean(flags.FLAGS) # Ensures flag override logic is only executed if explicitly triggered. if flags_obj.tf_gpu_thread_mode: override_flags_and_set_envars_for_gpu_thread_pool(flags_obj) # Creates session config. allow_soft_placement = True, is required for # multi-GPU and is not harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) # Creates a `RunConfig` that checkpoints every 24 hours which essentially # results in checkpoints determined only by `epochs_between_evals`. run_config = tf.estimator.RunConfig( train_distribute=distribution_strategy, session_config=session_config, save_checkpoints_secs=60*60*24) # Initializes model with all but the dense layer from pretrained VGG. if flags_obj.pretrained_model_checkpoint_path is not None: warm_start_settings = tf.estimator.WarmStartSettings( flags_obj.pretrained_model_checkpoint_path, vars_to_warm_start='^(?!.*dense)') else: warm_start_settings = None classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, warm_start_from=warm_start_settings, params={ 'vgg_size': flags_obj.vgg_size, 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj), 'fine_tune': flags_obj.fine_tune }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'vgg_size': flags_obj.vgg_size, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('vgg', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) train_hooks = list(train_hooks) + lottery.hooks_from_flags(flags_obj.flag_values_dict()) def input_fn_train(num_epochs): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=num_epochs, dtype=flags_core.get_tf_dtype(flags_obj), datasets_num_private_threads=flags_obj.datasets_num_private_threads, num_parallel_batches=flags_obj.datasets_num_parallel_batches) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1, dtype=flags_core.get_tf_dtype(flags_obj)) if flags_obj.lth_generate_predictions: ckpt = tf.train.latest_checkpoint(flags_obj.model_dir) if flags_obj.lth_no_pruning: m_hooks = [] else: m_hooks = lottery.hooks_from_flags(flags_obj.flag_values_dict()) eval_results = classifier.predict( input_fn=input_fn_eval, checkpoint_path=ckpt, hooks=m_hooks, ) assert flags_obj.lth_prediction_result_dir with tf.gfile.Open(os.path.join(flags_obj.data_dir, 'test_batch.bin'), 'rb') as f: labels = list(f.read()[::32*32*3+1]) eval_results = list(eval_results) if not tf.gfile.Exists(flags_obj.lth_prediction_result_dir): tf.gfile.MakeDirs(flags_obj.lth_prediction_result_dir) with tf.gfile.Open(os.path.join(flags_obj.lth_prediction_result_dir, 'predictions'), 'wb') as f: for label, res in zip(labels, eval_results): res['label'] = label pickle.dump(eval_results, f) return try: cpr = tf.train.NewCheckpointReader(tf.train.latest_checkpoint(flags_obj.model_dir)) current_step = cpr.get_tensor('global_step') except: current_step = 0 while current_step < flags_obj.max_train_steps: next_checkpoint = min(current_step + 10000, flags_obj.max_train_steps) classifier.train(input_fn=lambda: input_fn_train(1000), hooks=train_hooks, max_steps=next_checkpoint) current_step = next_checkpoint tf.logging.info('Starting to evaluate.') eval_results = classifier.evaluate(input_fn=input_fn_eval) benchmark_logger.log_evaluation_result(eval_results) if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. export_dtype = flags_core.get_tf_dtype(flags_obj) if flags_obj.image_bytes_as_serving_input: input_receiver_fn = functools.partial( image_bytes_serving_input_fn, shape, dtype=export_dtype) else: input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size, dtype=export_dtype) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn, strip_default_attrs=True)
def run_transformer(flags_obj): """Create tf.Estimator to train and evaluate transformer model. Args: flags_obj: Object containing parsed flag values. Returns: Dict of results of the run. Contains the keys `eval_results`, `train_hooks`, `bleu_cased`, and `bleu_uncased`. `train_hooks` is a list the instances of hooks used during training. """ num_gpus = flags_core.get_num_gpus(flags_obj) # Add flag-defined parameters to params object params = PARAMS_MAP[flags_obj.param_set] if num_gpus > 1: if flags_obj.param_set == "big": params = model_params.BIG_MULTI_GPU_PARAMS elif flags_obj.param_set == "base": params = model_params.BASE_MULTI_GPU_PARAMS params["data_dir"] = flags_obj.data_dir params["model_dir"] = flags_obj.model_dir params["num_parallel_calls"] = flags_obj.num_parallel_calls params["tpu"] = flags_obj.tpu params["use_tpu"] = bool(flags_obj.tpu) # was a tpu specified. params["static_batch"] = flags_obj.static_batch or params["use_tpu"] params["allow_ffn_pad"] = not params["use_tpu"] params["max_length"] = flags_obj.max_length or params["max_length"] params["use_synthetic_data"] = flags_obj.use_synthetic_data # Set batch size parameter, which depends on the availability of # TPU and GPU, and distribution settings. params["batch_size"] = ( flags_obj.batch_size or (params["default_batch_size_tpu"] if params["use_tpu"] else params["default_batch_size"])) total_batch_size = params["batch_size"] if not params["use_tpu"]: params["batch_size"] = distribution_utils.per_replica_batch_size( params["batch_size"], num_gpus) schedule_manager = schedule.Manager( train_steps=flags_obj.train_steps, steps_between_evals=flags_obj.steps_between_evals, train_epochs=flags_obj.train_epochs, epochs_between_evals=flags_obj.epochs_between_evals, default_train_epochs=DEFAULT_TRAIN_EPOCHS, batch_size=params["batch_size"], max_length=params["max_length"], use_tpu=params["use_tpu"], num_tpu_shards=flags_obj.num_tpu_shards) params["repeat_dataset"] = schedule_manager.repeat_dataset model_helpers.apply_clean(flags.FLAGS) # Create hooks that log information about the training and metric values train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, model_dir=flags_obj.model_dir, tensors_to_log=TENSORS_TO_LOG, # used for logging hooks batch_size=total_batch_size, # for ExamplesPerSecondHook use_tpu=params["use_tpu"] # Not all hooks can run with TPUs ) benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info(model_name="transformer", dataset_name="wmt_translate_ende", run_params=params, test_id=flags_obj.benchmark_test_id) # Train and evaluate transformer model estimator = construct_estimator(flags_obj, params, schedule_manager) stats = run_loop( estimator=estimator, # Training arguments schedule_manager=schedule_manager, train_hooks=train_hooks, benchmark_logger=benchmark_logger, # BLEU calculation arguments bleu_source=flags_obj.bleu_source, bleu_ref=flags_obj.bleu_ref, bleu_threshold=flags_obj.stop_threshold, vocab_file=flags_obj.vocab_file) if flags_obj.export_dir and not params["use_tpu"]: serving_input_fn = export.build_tensor_serving_input_receiver_fn( shape=[None], dtype=tf.int64, batch_size=None) # Export saved model, and save the vocab file as an extra asset. The vocab # file is saved to allow consistent input encoding and output decoding. # (See the "Export trained model" section in the README for an example of # how to use the vocab file.) # Since the model itself does not use the vocab file, this file is saved as # an extra asset rather than a core asset. estimator.export_savedmodel( flags_obj.export_dir, serving_input_fn, assets_extra={"vocab.txt": flags_obj.vocab_file}, strip_default_attrs=True) return stats
def run_transformer(flags_obj): """Create tf.Estimator to train and evaluate transformer model. Args: flags_obj: Object containing parsed flag values. """ num_gpus = flags_core.get_num_gpus(flags_obj) # Add flag-defined parameters to params object params = PARAMS_MAP[flags_obj.param_set] if num_gpus > 1: if flags_obj.param_set == "big": params = model_params.BIG_MULTI_GPU_PARAMS elif flags_obj.param_set == "base": params = model_params.BASE_MULTI_GPU_PARAMS params["data_dir"] = flags_obj.data_dir params["model_dir"] = flags_obj.model_dir params["num_parallel_calls"] = flags_obj.num_parallel_calls params["tpu"] = flags_obj.tpu params["use_tpu"] = bool(flags_obj.tpu) # was a tpu specified. params["static_batch"] = flags_obj.static_batch or params["use_tpu"] params["allow_ffn_pad"] = not params["use_tpu"] params["use_synthetic_data"] = flags_obj.use_synthetic_data # Set batch size parameter, which depends on the availability of # TPU and GPU, and distribution settings. params["batch_size"] = (flags_obj.batch_size or ( params["default_batch_size_tpu"] if params["use_tpu"] else params["default_batch_size"])) if not params["use_tpu"]: params["batch_size"] = distribution_utils.per_device_batch_size( params["batch_size"], num_gpus) schedule_manager = schedule.Manager( train_steps=flags_obj.train_steps, steps_between_evals=flags_obj.steps_between_evals, train_epochs=flags_obj.train_epochs, epochs_between_evals=flags_obj.epochs_between_evals, default_train_epochs=DEFAULT_TRAIN_EPOCHS, batch_size=params["batch_size"], max_length=params["max_length"], use_tpu=params["use_tpu"], num_tpu_shards=flags_obj.num_tpu_shards ) params["repeat_dataset"] = schedule_manager.repeat_dataset model_helpers.apply_clean(flags.FLAGS) # Create hooks that log information about the training and metric values train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, model_dir=flags_obj.model_dir, tensors_to_log=TENSORS_TO_LOG, # used for logging hooks batch_size=schedule_manager.batch_size, # for ExamplesPerSecondHook use_tpu=params["use_tpu"] # Not all hooks can run with TPUs ) benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info( model_name="transformer", dataset_name="wmt_translate_ende", run_params=params, test_id=flags_obj.benchmark_test_id) # Train and evaluate transformer model estimator = construct_estimator(flags_obj, params, schedule_manager) run_loop( estimator=estimator, # Training arguments schedule_manager=schedule_manager, train_hooks=train_hooks, benchmark_logger=benchmark_logger, # BLEU calculation arguments bleu_source=flags_obj.bleu_source, bleu_ref=flags_obj.bleu_ref, bleu_threshold=flags_obj.stop_threshold, vocab_file=flags_obj.vocab_file) if flags_obj.export_dir and not params["use_tpu"]: serving_input_fn = export.build_tensor_serving_input_receiver_fn( shape=[None], dtype=tf.int64, batch_size=None) # Export saved model, and save the vocab file as an extra asset. The vocab # file is saved to allow consistent input encoding and output decoding. # (See the "Export trained model" section in the README for an example of # how to use the vocab file.) # Since the model itself does not use the vocab file, this file is saved as # an extra asset rather than a core asset. estimator.export_savedmodel( flags_obj.export_dir, serving_input_fn, assets_extra={"vocab.txt": flags_obj.vocab_file}, strip_default_attrs=True)
def resnet_main( flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) if flags_core.get_num_gpus(flags_obj) == 0: distribution = tf.contrib.distribute.OneDeviceStrategy('device:CPU:0') elif flags_core.get_num_gpus(flags_obj) == 1: distribution = tf.contrib.distribute.OneDeviceStrategy('device:GPU:0') else: distribution = tf.contrib.distribute.MirroredStrategy( num_gpus=flags_core.get_num_gpus(flags_obj) ) run_config = tf.estimator.RunConfig(train_distribute=distribution, session_config=session_config) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj) }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } benchmark_logger = logger.config_benchmark_logger(flags_obj) benchmark_logger.log_run_info('resnet', dataset_name, run_params) train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, batch_size=flags_obj.batch_size) def input_fn_train(): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=flags_obj.epochs_between_evals) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1) total_training_cycle = (flags_obj.train_epochs // flags_obj.epochs_between_evals) for cycle_index in range(total_training_cycle): tf.logging.info('Starting a training cycle: %d/%d', cycle_index, total_training_cycle) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags_obj.max_train_steps) tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold( flags_obj.stop_threshold, eval_results['accuracy']): break if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn)
def resnet_main( flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ print("RESNET MAIN") model_helpers.apply_clean(flags.FLAGS) # Ensures flag override logic is only executed if explicitly triggered. if flags_obj.tf_gpu_thread_mode: override_flags_and_set_envars_for_gpu_thread_pool(flags_obj) # Creates session config. allow_soft_placement = True, is required for # multi-GPU and is not harmful for other modes. session_config = tf.ConfigProto(allow_soft_placement=True) run_config = tf.estimator.RunConfig( session_config=session_config, save_checkpoints_secs=60*60*24) # Initializes model with all but the dense layer from pretrained ResNet. if flags_obj.pretrained_model_checkpoint_path is not None: warm_start_settings = tf.estimator.WarmStartSettings( flags_obj.pretrained_model_checkpoint_path, vars_to_warm_start='^(?!.*dense)') else: warm_start_settings = None classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, warm_start_from=warm_start_settings, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj), 'fine_tune': flags_obj.fine_tune }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1, dtype=flags_core.get_tf_dtype(flags_obj)) schedule, n_loops = [0], 1 if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. export_dtype = flags_core.get_tf_dtype(flags_obj) if flags_obj.image_bytes_as_serving_input: input_receiver_fn = functools.partial( image_bytes_serving_input_fn, shape, dtype=export_dtype) else: input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size, dtype=export_dtype) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn, strip_default_attrs=True)
def net_main(flags_obj, model_function, input_function, net_data_configs, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ model_helpers.apply_clean(flags.FLAGS) is_metriclog = True if is_metriclog: metric_logfn = os.path.join(flags_obj.model_dir, 'log_metric.txt') metric_logf = open(metric_logfn, 'a') from tensorflow.contrib.memory_stats.ops import gen_memory_stats_ops max_memory_usage = gen_memory_stats_ops.max_bytes_in_use() # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy, session_config=session_config) # initialize our model with all but the dense layer from pretrained resnet if flags_obj.pretrained_model_checkpoint_path is not None: warm_start_settings = tf.estimator.WarmStartSettings( flags_obj.pretrained_model_checkpoint_path, vars_to_warm_start='^(?!.*dense)') else: warm_start_settings = None classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, warm_start_from=warm_start_settings, params={ 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'loss_scale': flags_core.get_loss_scale(flags_obj), 'weight_decay': flags_obj.weight_decay, 'dtype': flags_core.get_tf_dtype(flags_obj), 'fine_tune': flags_obj.fine_tune, 'examples_per_epoch': flags_obj.examples_per_epoch, 'net_data_configs': net_data_configs }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } dataset_name = net_data_configs['dataset_name'] if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('meshnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) def input_fn_train(num_epochs): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=num_epochs, num_gpus=flags_core.get_num_gpus(flags_obj), examples_per_epoch=flags_obj.examples_per_epoch, sg_settings=net_data_configs['sg_settings']) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1, sg_settings=net_data_configs['sg_settings']) if flags_obj.eval_only or flags_obj.pred_ply or not flags_obj.train_epochs: # If --eval_only is set, perform a single loop with zero train epochs. schedule, n_loops = [0], 1 else: # Compute the number of times to loop while training. All but the last # pass will train for `epochs_between_evals` epochs, while the last will # train for the number needed to reach `training_epochs`. For instance if # train_epochs = 25 and epochs_between_evals = 10 # schedule will be set to [10, 10, 5]. That is to say, the loop will: # Train for 10 epochs and then evaluate. # Train for another 10 epochs and then evaluate. # Train for a final 5 epochs (to reach 25 epochs) and then evaluate. n_loops = math.ceil(flags_obj.train_epochs / flags_obj.epochs_between_evals) schedule = [ flags_obj.epochs_between_evals for _ in range(int(n_loops)) ] schedule[-1] = flags_obj.train_epochs - sum( schedule[:-1]) # over counting. classifier.train(input_fn=lambda: input_fn_train(1), hooks=train_hooks, max_steps=10) with tf.Session() as sess: max_memory_usage_v = sess.run(max_memory_usage) tf.logging.info('\n\nmemory usage: %0.3f G\n\n' % (max_memory_usage_v * 1.0 / 1e9)) best_acc, best_acc_checkpoint = load_saved_best(flags_obj.model_dir) for cycle_index, num_train_epochs in enumerate(schedule): tf.logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops)) t0 = time.time() train_t = 0 if num_train_epochs: classifier.train(input_fn=lambda: input_fn_train(num_train_epochs), hooks=train_hooks, max_steps=flags_obj.max_train_steps) train_t = (time.time() - t0) / num_train_epochs tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. only_train = False and (not flags_obj.eval_only) and ( not flags_obj.pred_ply) if not only_train: t0 = time.time() eval_results = classifier.evaluate( input_fn=input_fn_eval, steps=flags_obj.max_train_steps, ) #checkpoint_path=best_acc_checkpoint) eval_t = time.time() - t0 if flags_obj.pred_ply: pred_generator = classifier.predict(input_fn=input_fn_eval) num_classes = net_data_configs['dset_metas'].num_classes gen_pred_ply(eval_results, pred_generator, flags_obj.model_dir, num_classes) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags_obj.stop_threshold, eval_results['accuracy']): break cur_is_best = '' if num_train_epochs and eval_results['accuracy'] > best_acc: best_acc = eval_results['accuracy'] save_cur_model_as_best_acc(flags_obj.model_dir, best_acc) cur_is_best = 'best' global_step = cur_global_step(flags_obj.model_dir) epoch = int(global_step / flags_obj.examples_per_epoch * flags_obj.num_gpus) ious_str = get_ious_str(eval_results['cm'], net_data_configs['dset_metas'], eval_results['mean_iou']) metric_logf.write( '\n{} train t:{:.1f} eval t:{:.1f} \teval acc:{:.3f} \tmean_iou:{:.3f} {} {}\n' .format(epoch, train_t, eval_t, eval_results['accuracy'], eval_results['mean_iou'], cur_is_best, ious_str)) metric_logf.flush() if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn)
def resnet_main(seed, flags, model_function, input_function, shape=None): """Shared main loop for ResNet Models. Args: flags: FLAGS object that contains the params for running. See ResnetArgParser for created flags. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. shape: list of ints representing the shape of the images used for training. This is only used if flags.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) if flags.num_gpus == 0: distribution = tf.contrib.distribute.OneDeviceStrategy('device:CPU:0') elif flags.num_gpus == 1: distribution = tf.contrib.distribute.OneDeviceStrategy('device:GPU:0') else: distribution = tf.contrib.distribute.MirroredStrategy( num_gpus=flags.num_gpus ) run_config = tf.estimator.RunConfig(train_distribute=distribution, session_config=session_config, tf_random_seed=seed) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'version': flags.version, 'loss_scale': flags.loss_scale, 'dtype': flags.dtype }) if flags.benchmark_log_dir is not None: benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir) benchmark_logger.log_run_info('resnet') else: benchmark_logger = None for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, benchmark_log_dir=flags.benchmark_log_dir) print('Starting a training cycle.') def input_fn_train(): return input_function( is_training=True, data_dir=flags.data_dir, batch_size=per_device_batch_size(flags.batch_size, flags.num_gpus), num_epochs=flags.epochs_between_evals, ) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags.max_train_steps) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function( is_training=False, data_dir=flags.data_dir, batch_size=per_device_batch_size(flags.batch_size, flags.num_gpus), num_epochs=1, ) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results) if benchmark_logger: benchmark_logger.log_estimator_evaluation_result(eval_results) if model_helpers.past_stop_threshold( flags.stop_threshold, eval_results['accuracy']): break if flags.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags.batch_size) classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
def convinh_main( flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for convinh Models. Args: flags_obj: An object containing parsed flags. See define_convinh_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj. _dir is passed. """ model_helpers.apply_clean(flags.FLAGS) # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) run_config = tf.estimator.RunConfig( tf_random_seed=flags_obj.seed, train_distribute=distribution_strategy, session_config=session_config, keep_checkpoint_max = flags_obj.num_ckpt ) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, params={ 'model_params':{ 'data_format':flags_obj.data_format, 'filters':list(map(int,flags_obj.filters)), 'ratio_PV': flags_obj.ratio_PV, 'ratio_SST': flags_obj.ratio_SST, 'conv_kernel_size':list(map(int,flags_obj.conv_kernel_size)), 'conv_kernel_size_inh':list(map(int,flags_obj.conv_kernel_size_inh)), 'conv_strides':list(map(int,flags_obj.conv_strides)), 'pool_size':list(map(int,flags_obj.pool_size)), 'pool_strides':list(map(int,flags_obj.pool_strides)), 'num_ff_layers':flags_obj.num_ff_layers, 'num_rnn_layers':flags_obj.num_rnn_layers, 'connection':flags_obj.connection, 'n_time':flags_obj.n_time, 'cell_fn':flags_obj.cell_fn, 'act_fn':flags_obj.act_fn, 'pvsst_circuit':flags_obj.pvsst_circuit, 'gating':flags_obj.gating, 'normalize':flags_obj.normalize, 'num_classes':flags_obj.num_classes }, 'batch_size' : flags_obj.batch_size, 'weight_decay': flags_obj.weight_decay, 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj) }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'convinh_size': flags_obj.convinh_size, # deprecated 'convinh_version': flags_obj.convinh_version, # deprecated 'synthetic_data': flags_obj.use_synthetic_data, # deprecated 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('convinh', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) class input_fn_train(object): def __init__(self,num_epochs): self._num_epochs = num_epochs def __call__(self): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=self._num_epochs, num_gpus=flags_core.get_num_gpus(flags_obj)) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1) tf.logging.info('Evaluate the intial model.') eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) # training total_training_cycle = (flags_obj.train_epochs // flags_obj.epochs_between_evals) + 1 for cycle_index in range(total_training_cycle): cur_train_epochs = flags_obj.epochs_between_evals if cycle_index else 1 tf.logging.info('Starting a training cycle: %d/%d, with %d epochs', cycle_index, total_training_cycle, cur_train_epochs) classifier.train(input_fn=input_fn_train(cur_train_epochs), hooks=train_hooks, max_steps=flags_obj.max_train_steps) tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold( flags_obj.stop_threshold, eval_results['accuracy']): break if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=1) if cycle_index==0: classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn, checkpoint_path='{}/model.ckpt-0'.format(flags_obj.model_dir)) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn)
def resnet_main(flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ model_helpers.apply_clean(flags.FLAGS) # Ensures flag override logic is only executed if explicitly triggered. if flags_obj.tf_gpu_thread_mode: override_flags_and_set_envars_for_gpu_thread_pool(flags_obj) # Creates session config. allow_soft_placement = True, is required for # multi-GPU and is not harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) # Creates a `RunConfig` that checkpoints every 24 hours which essentially # results in checkpoints determined only by `epochs_between_evals`. run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy, session_config=session_config, save_checkpoints_secs=60 * 60 * 24) # Initializes model with all but the dense layer from pretrained ResNet. if flags_obj.pretrained_model_checkpoint_path is not None: warm_start_settings = tf.estimator.WarmStartSettings( flags_obj.pretrained_model_checkpoint_path, vars_to_warm_start='^(?!.*dense)') else: warm_start_settings = None classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, warm_start_from=warm_start_settings, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj), 'fine_tune': flags_obj.fine_tune }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('resnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) def input_fn_train(num_epochs): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=num_epochs, dtype=flags_core.get_tf_dtype(flags_obj), datasets_num_private_threads=flags_obj. datasets_num_private_threads, num_parallel_batches=flags_obj.datasets_num_parallel_batches) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1, dtype=flags_core.get_tf_dtype(flags_obj)) if flags_obj.eval_only or not flags_obj.train_epochs: # If --eval_only is set, perform a single loop with zero train epochs. schedule, n_loops = [0], 1 else: # Compute the number of times to loop while training. All but the last # pass will train for `epochs_between_evals` epochs, while the last will # train for the number needed to reach `training_epochs`. For instance if # train_epochs = 25 and epochs_between_evals = 10 # schedule will be set to [10, 10, 5]. That is to say, the loop will: # Train for 10 epochs and then evaluate. # Train for another 10 epochs and then evaluate. # Train for a final 5 epochs (to reach 25 epochs) and then evaluate. n_loops = math.ceil(flags_obj.train_epochs / flags_obj.epochs_between_evals) schedule = [ flags_obj.epochs_between_evals for _ in range(int(n_loops)) ] schedule[-1] = flags_obj.train_epochs - sum( schedule[:-1]) # over counting. with tf.Session() as sess: run_metadata = tf.RunMetadata() for cycle_index, num_train_epochs in enumerate(schedule): tf.logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops)) if num_train_epochs: sess.run(classifier.train( input_fn=lambda: input_fn_train(num_train_epochs), hooks=train_hooks, max_steps=flags_obj.max_train_steps), run_metadata=run_metadata) tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = sess.run(classifier.evaluate( input_fn=input_fn_eval, steps=flags_obj.max_train_steps), run_metadata=run_metadata) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags_obj.stop_threshold, eval_results['accuracy']): break fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open('timeline_run_loop.json', 'w') as f: f.write(chrome_trace) if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. export_dtype = flags_core.get_tf_dtype(flags_obj) if flags_obj.image_bytes_as_serving_input: input_receiver_fn = functools.partial(image_bytes_serving_input_fn, shape, dtype=export_dtype) else: input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size, dtype=export_dtype) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn, strip_default_attrs=True)
def run_transformer(flags_obj): """Create tf.Estimator to train and evaluate transformer model. Args: flags_obj: Object containing parsed flag values. """ num_gpus = flags_core.get_num_gpus(flags_obj) # Add flag-defined parameters to params object params = PARAMS_MAP[flags_obj.param_set] if num_gpus > 1: if flags_obj.param_set == "big": params = model_params.BIG_MULTI_GPU_PARAMS elif flags_obj.param_set == "base": params = model_params.BASE_MULTI_GPU_PARAMS params["data_dir"] = flags_obj.data_dir params["model_dir"] = flags_obj.model_dir params["num_parallel_calls"] = flags_obj.num_parallel_calls params["tpu"] = flags_obj.tpu params["use_tpu"] = bool(flags_obj.tpu) # was a tpu specified. params["static_batch"] = flags_obj.static_batch or params["use_tpu"] params["allow_ffn_pad"] = not params["use_tpu"] params["use_synthetic_data"] = flags_obj.use_synthetic_data # Set batch size parameter, which depends on the availability of # TPU and GPU, and distribution settings. params["batch_size"] = ( flags_obj.batch_size or (params["default_batch_size_tpu"] if params["use_tpu"] else params["default_batch_size"])) # TC: set vocab_size as the number of tokens in vocab_file params["vocab_size"] = len(open(flags_obj.vocab_file).readlines()) print('TC: vocab_size %d' % params["vocab_size"]) if not params["use_tpu"]: params["batch_size"] = distribution_utils.per_device_batch_size( params["batch_size"], num_gpus) # debug if True: print('debug: train_steps', flags_obj.train_steps) print('debug: train_epochs', flags_obj.train_epochs) print('debug: epochs_between_evals', flags_obj.epochs_between_evals) print('debug: steps_between_evals', flags_obj.steps_between_evals) print('debug: flags_obj.batch_size', flags_obj.batch_size) print('debug: batch_size', params['batch_size']) #exit() schedule_manager = schedule.Manager( train_steps=flags_obj.train_steps, #train_steps=10000, steps_between_evals=flags_obj.steps_between_evals, #steps_between_evals=1, train_epochs=flags_obj.train_epochs, epochs_between_evals=flags_obj.epochs_between_evals, default_train_epochs=DEFAULT_TRAIN_EPOCHS, batch_size=params["batch_size"], max_length=params["max_length"], use_tpu=params["use_tpu"], num_tpu_shards=flags_obj.num_tpu_shards) params["repeat_dataset"] = schedule_manager.repeat_dataset model_helpers.apply_clean(flags.FLAGS) if not params["use_bow"]: TENSORS_TO_LOG.pop("bow_loss") # Create hooks that log information about the training and metric values train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, model_dir=flags_obj.model_dir, tensors_to_log=TENSORS_TO_LOG, # used for logging hooks batch_size=schedule_manager.batch_size, # for ExamplesPerSecondHook use_tpu=params["use_tpu"] # Not all hooks can run with TPUs ) debug_hooks = [tf_debug.LocalCLIDebugHook()] benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info(model_name="transformer", dataset_name="news_comments_generation", run_params=params, test_id=flags_obj.benchmark_test_id) # Train and evaluate transformer model estimator = construct_estimator(flags_obj, params, schedule_manager) run_loop( estimator=estimator, # Training arguments schedule_manager=schedule_manager, #train_hooks=debug_hooks, train_hooks=train_hooks, benchmark_logger=benchmark_logger, # BLEU calculation arguments bleu_source=flags_obj.bleu_source, bleu_ref=flags_obj.bleu_ref, bleu_threshold=flags_obj.stop_threshold, vocab_file=flags_obj.vocab_file) if flags_obj.export_dir: serving_input_fn = export.build_tensor_serving_input_receiver_fn( shape=[None], dtype=tf.int64, batch_size=None) # Export saved model, and save the vocab file as an extra asset. The vocab # file is saved to allow consistent input encoding and output decoding. # (See the "Export trained model" section in the README for an example of # how to use the vocab file.) # Since the model itself does not use the vocab file, this file is saved as # an extra asset rather than a core asset. estimator.export_savedmodel( flags_obj.export_dir, serving_input_fn, assets_extra={"vocab.txt": flags_obj.vocab_file})
def run_transformer(flags_obj): """Create tf.Estimator to train and evaluate transformer model. Args: flags_obj: Object containing parsed flag values. """ num_gpus = flags_core.get_num_gpus(flags_obj) # Add flag-defined parameters to params object params = PARAMS_MAP[flags_obj.param_set] # 设置网络规模的种类,基础版还是高级版 if num_gpus > 1: if flags_obj.param_set == "big": params = model_params.BIG_MULTI_GPU_PARAMS elif flags_obj.param_set == "base": params = model_params.BASE_MULTI_GPU_PARAMS params["data_dir"] = flags_obj.data_dir params["model_dir"] = flags_obj.model_dir params["num_parallel_calls"] = flags_obj.num_parallel_calls params["tpu"] = flags_obj.tpu params["use_tpu"] = bool(flags_obj.tpu) # was a tpu specified. params["static_batch"] = flags_obj.static_batch or params["use_tpu"] params["allow_ffn_pad"] = not params["use_tpu"] params["use_synthetic_data"] = flags_obj.use_synthetic_data # 什么叫使用合成数据? # Set batch size parameter, which depends on the availability of # TPU and GPU, and distribution settings. params["batch_size"] = ( flags_obj.batch_size or (params["default_batch_size_tpu"] if params["use_tpu"] else params["default_batch_size"])) if not params["use_tpu"]: params["batch_size"] = distribution_utils.per_device_batch_size( params["batch_size"], num_gpus) schedule_manager = schedule.Manager( # 用来管理训练进度的实例,例如steps,验证间隔步数等 train_steps=flags_obj.train_steps, steps_between_evals=flags_obj.steps_between_evals, train_epochs=flags_obj.train_epochs, epochs_between_evals=flags_obj.epochs_between_evals, default_train_epochs=DEFAULT_TRAIN_EPOCHS, batch_size=params["batch_size"], max_length=params["max_length"], use_tpu=params["use_tpu"], num_tpu_shards=flags_obj.num_tpu_shards) params["repeat_dataset"] = schedule_manager.repeat_dataset model_helpers.apply_clean(flags.FLAGS) # 清理一下数据和之前保存的模型,但是需要在参数中指定允许清理 # Create hooks that log information about the training and metric values train_hooks = hooks_helper.get_train_hooks( # 好像是输出日志的时候需要这个实例 flags_obj.hooks, model_dir=flags_obj.model_dir, tensors_to_log=TENSORS_TO_LOG, # used for logging hooks batch_size=schedule_manager.batch_size, # for ExamplesPerSecondHook use_tpu=params["use_tpu"] # Not all hooks can run with TPUs ) benchmark_logger = logger.get_benchmark_logger() # 还是用来输出日志的 benchmark_logger.log_run_info(model_name="transformer", dataset_name="wmt_translate_ende", run_params=params, test_id=flags_obj.benchmark_test_id) # Train and evaluate transformer model estimator = construct_estimator( flags_obj, params, schedule_manager) # 返回一个tf.estimator.Estimator用来训练和验证模型 run_loop( estimator=estimator, # “估计器”,用来帮助训练和验证模型 # Training arguments schedule_manager=schedule_manager, # 用来管理训练过程的,训练多少steps,多久验证一次等 train_hooks=train_hooks, # 打日志的? benchmark_logger=benchmark_logger, # 打日志的? # BLEU calculation arguments bleu_source=flags_obj.bleu_source, # 3个关于bleu的文件 bleu_ref=flags_obj.bleu_ref, bleu_threshold=flags_obj.stop_threshold, vocab_file=flags_obj.vocab_file) # 词表文件 if flags_obj.export_dir: serving_input_fn = export.build_tensor_serving_input_receiver_fn( shape=[None], dtype=tf.int64, batch_size=None) # Export saved model, and save the vocab file as an extra asset. The vocab # file is saved to allow consistent input encoding and output decoding. # (See the "Export trained model" section in the README for an example of # how to use the vocab file.) # Since the model itself does not use the vocab file, this file is saved as # an extra asset rather than a core asset. estimator.export_savedmodel( flags_obj.export_dir, serving_input_fn, assets_extra={"vocab.txt": flags_obj.vocab_file})
def resnet_main(flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ model_helpers.apply_clean(flags.FLAGS) # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. gpu_memory_fraction = tf.GPUOptions( per_process_gpu_memory_fraction=0.4) # Xinyi add session_config = tf.ConfigProto( gpu_options=gpu_memory_fraction, # Xinyi add inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( # flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) 1, flags_obj.all_reduce_alg ) # Xinyi modified, get_num_gpus() will occupy all GPUs run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy, session_config=session_config) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj) }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('resnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) def input_fn_train(): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( #flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), flags_obj.batch_size, 1), # Xinyi modified, get_num_gpus() will occupy all GPUs num_epochs=flags_obj.epochs_between_evals, #num_gpus=flags_core.get_num_gpus(flags_obj)) num_gpus=1) # Xinyi modified, get_num_gpus() will occupy all GPUs def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( #flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), flags_obj.batch_size, 1), # Xinyi modified, get_num_gpus() will occupy all GPUs num_epochs=1) total_training_cycle = (flags_obj.train_epochs // flags_obj.epochs_between_evals) for cycle_index in range(total_training_cycle): tf.logging.info('Starting a training cycle: %d/%d', cycle_index, total_training_cycle) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags_obj.max_train_steps) tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) # Xinyi add, writing accuracy after every training epoch filename = os.path.join(flags.FLAGS.model_dir, 'learning_curve.csv') file_exists = os.path.isfile(filename) steps_per_epoch = int(50000 / flags.FLAGS.batch_size) fields = [ 'epochs', 'eval_accuracy', 'optimizer', 'learning_rate', 'decay_rate', 'decay_steps', 'initializer', 'regularizer', 'weight_decay', 'batch_size', 'model_id' ] csv_data = { 'epochs': flags.FLAGS.epoch_index, 'eval_accuracy': eval_results['accuracy'], 'optimizer': flags.FLAGS.optimizer, 'learning_rate': flags.FLAGS.learning_rate, 'decay_rate': flags.FLAGS.decay_rate, 'decay_steps': flags.FLAGS.decay_steps, 'initializer': flags.FLAGS.initializer, 'regularizer': flags.FLAGS.regularizer, 'weight_decay': flags.FLAGS.weight_decay, 'batch_size': flags.FLAGS.batch_size, 'model_id': flags.FLAGS.model_id } if flags.FLAGS.optimizer=='Momentum' \ or flags.FLAGS.optimizer=='RMSProp': fields.append('momentum') csv_data['momentum'] = flags.FLAGS.momentum if flags.FLAGS.optimizer == 'RMSProp': fields.append('grad_decay') csv_data['grad_decay'] = flags.FLAGS.grad_decay with open(filename, 'a') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fields) if not file_exists: writer.writeheader() writer.writerow(csv_data) if model_helpers.past_stop_threshold(flags_obj.stop_threshold, eval_results['accuracy']): # break return eval_results['accuracy'] # Xinyi modified if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn) return eval_results['accuracy'] # Xinyi modified
def resnet_main(flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ model_helpers.apply_clean(flags.FLAGS) # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy, session_config=session_config) # initialize our model with all but the dense layer from pretrained resnet if flags_obj.pretrained_model_checkpoint_path is not None: warm_start_settings = tf.estimator.WarmStartSettings( flags_obj.pretrained_model_checkpoint_path, vars_to_warm_start='^(?!.*dense)') else: warm_start_settings = None classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, warm_start_from=warm_start_settings, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj), 'fine_tune': flags_obj.fine_tune }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('resnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) def input_fn_train(num_epochs): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=num_epochs, num_gpus=flags_core.get_num_gpus(flags_obj), dtype=flags_core.get_tf_dtype(flags_obj)) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1, dtype=flags_core.get_tf_dtype(flags_obj)) if flags_obj.eval_only or not flags_obj.train_epochs: # If --eval_only is set, perform a single loop with zero train epochs. schedule, n_loops = [0], 1 else: # Compute the number of times to loop while training. All but the last # pass will train for `epochs_between_evals` epochs, while the last will # train for the number needed to reach `training_epochs`. For instance if # train_epochs = 25 and epochs_between_evals = 10 # schedule will be set to [10, 10, 5]. That is to say, the loop will: # Train for 10 epochs and then evaluate. # Train for another 10 epochs and then evaluate. # Train for a final 5 epochs (to reach 25 epochs) and then evaluate. n_loops = math.ceil(flags_obj.train_epochs / flags_obj.epochs_between_evals) schedule = [ flags_obj.epochs_between_evals for _ in range(int(n_loops)) ] schedule[-1] = flags_obj.train_epochs - sum( schedule[:-1]) # over counting. for cycle_index, num_train_epochs in enumerate(schedule): tf.logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops)) if num_train_epochs: classifier.train(input_fn=lambda: input_fn_train(num_train_epochs), hooks=train_hooks, max_steps=flags_obj.max_train_steps) tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags_obj.stop_threshold, eval_results['accuracy']): break if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn)
def resnet_main( flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. ###os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' print("I am in rensnet main") # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. hvd.init() print("inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads",flags_obj.inter_op_parallelism_threads) print("intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads",flags_obj.intra_op_parallelism_threads) session_config = tf.ConfigProto( inter_op_parallelism_threads=True,#flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=True,#flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) print("hvd.size",hvd.size(),"hvd.local_rank",hvd.local_rank(),"hvd.rank",hvd.rank()) #session_config = tf.ConfigProto() #session_config.gpu_options.allow_growth = True if(hvd.rank()%2 ==0): session_config.gpu_options.visible_device_list= "0"#str(hvd.local_rank()) else: session_config.gpu_options.visible_device_list= "1" #distribution_strategy = distribution_utils.get_distribution_strategy( # flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) #run_config = tf.estimator.RunConfig( # train_distribute=distribution_strategy, session_config=session_config) run_config = tf.estimator.RunConfig(session_config=session_config) model_dir=flags_obj.model_dir if hvd.rank()==0 else None classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=model_dir, config=run_config, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj) }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('resnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) bcast_hook = hvd.BroadcastGlobalVariablesHook(0) train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, batch_size=flags_obj.batch_size)#Mahtab print("******** train hooks******",train_hooks) def input_fn_train():#Mahtab indx . return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=flags_obj.epochs_between_evals, num_gpus=flags_core.get_num_gpus(flags_obj),indx=hvd.rank()) def input_fn_eval():#Mahtab return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1) total_training_cycle = (flags_obj.train_epochs // flags_obj.epochs_between_evals) for cycle_index in range(total_training_cycle): tf.logging.info('Starting a training cycle: %d/%d', cycle_index, total_training_cycle) train_hooks=train_hooks.append(bcast_hook) classifier.train(input_fn=input_fn_train, hooks=train_hooks,max_steps=flags_obj.max_train_steps)#train_hooks [bcast_hook] tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold( flags_obj.stop_threshold, eval_results['accuracy']): break if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn)
def resnet_main(flags, model_function, input_function, shape=None): """Shared main loop for ResNet Models. Args: flags: FLAGS object that contains the params for running. See ResnetArgParser for created flags. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. shape: list of ints representing the shape of the images used for training. This is only used if flags.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_function, loss_reduction=tf.losses.Reduction.MEAN) # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace( save_checkpoints_secs=1e9, session_config=session_config) classifier = tf.estimator.Estimator(model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'version': flags.version, 'loss_scale': flags.loss_scale, 'dtype': flags.dtype }) if flags.benchmark_log_dir is not None: benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir) benchmark_logger.log_run_info('resnet') else: benchmark_logger = None for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, benchmark_log_dir=flags.benchmark_log_dir) print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_between_evals, flags.num_parallel_calls, flags.multi_gpu) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags.max_train_steps) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results) if benchmark_logger: benchmark_logger.log_estimator_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags.stop_threshold, eval_results['accuracy']): break if flags.export_dir is not None: warn_on_multi_gpu_export(flags.multi_gpu) # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags.batch_size) classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
def resnet_main(flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ model_helpers.apply_clean(flags.FLAGS) # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg) run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy, session_config=session_config) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj) }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('resnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) def input_fn_train(): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=flags_obj.epochs_between_evals, num_gpus=flags_core.get_num_gpus(flags_obj)) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1) total_training_cycle = (flags_obj.train_epochs // flags_obj.epochs_between_evals) profiler_hook = tf.train.ProfilerHook(save_steps=100, save_secs=None, output_dir="profs", show_memory=True, show_dataflow=True) #DOGA DEBUG GRAPH gdef = gpb.GraphDef() with open('/tmp/cifar10_model/graph.pbtxt', 'r') as fh: graph_str = fh.read() pbtf.Parse(graph_str, gdef) with tf.Graph().as_default() as graph: tf.import_graph_def(gdef) operations_tensors = {} operations_names = graph.get_operations() count1 = 0 count2 = 0 #print(operations_names) for operation in operations_names: operation_name = operation.name operations_info = graph.get_operation_by_name( operation_name).values() if len(operations_info) > 0: if not (operations_info[0].shape.ndims is None): operation_shape = operations_info[0].shape.as_list() operation_dtype_size = operations_info[0].dtype.size if not (operation_dtype_size is None): operation_no_of_elements = 1 for dim in operation_shape: if not (dim is None): operation_no_of_elements = operation_no_of_elements * dim total_size = operation_no_of_elements * operation_dtype_size operations_tensors[operation_name] = total_size else: count1 = count1 + 1 else: count1 = count1 + 1 operations_tensors[operation_name] = -1 else: count2 = count2 + 1 operations_tensors[operation_name] = -1 print(count1) print(count2) with open('tensors_sz.json', 'w') as f: json.dump(operations_tensors, f) for cycle_index in range(total_training_cycle): tf.logging.info('Starting a training cycle: %d/%d', cycle_index, total_training_cycle) classifier.train(input_fn=input_fn_train, hooks=[profiler_hook], max_steps=flags_obj.max_train_steps) tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags_obj.stop_threshold, eval_results['accuracy']): break if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn)
def resnet_main(flags, model_function, input_function, shape=None): """Shared main loop for ResNet Models. Args: flags: FLAGS object that contains the params for running. See ResnetArgParser for created flags. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. shape: list of ints representing the shape of the images used for training. This is only used if flags.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) if ALLOW_MULTIPLE_MODELS: session_config.gpu_options.allow_growth = True # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace( save_checkpoints_secs=5 * 60, # Save checkpoints every X minutes. keep_checkpoint_max=1000, # Retain the 1000 most recent checkpoints. #tf_random_seed = 5739, # Set random seed for "reproducible" results save_summary_steps=10000, # Number of steps between summaries session_config=session_config) classifier = tf.estimator.Estimator(model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'version': flags.version, 'ncmmethod': flags.ncmmethod, 'ncmparam': flags.ncmparam, 'initial_learning_scale': flags.initial_learning_scale }) if flags.benchmark_log_dir is not None: benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir) benchmark_logger.log_run_info("resnet") else: benchmark_logger = None for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, benchmark_log_dir=flags.benchmark_log_dir) #tensors_to_log = {"iter": "m_iter","deep-cnt": "m_cnt", "deep-sum": "m_sum"} #logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1) print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_between_evals, flags.num_parallel_calls, flags.multi_gpu) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags.max_train_steps) print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 1, flags.num_parallel_calls, flags.multi_gpu) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags.max_train_steps) print(eval_results) if benchmark_logger: benchmark_logger.log_estimator_evaluation_result(eval_results) if flags.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags.batch_size) classifier.export_savedmodel(flags.export_dir, input_receiver_fn)
def run_transformer(flags_obj): print("run_transformer") """Create tf.Estimator to train and evaluate transformer model. Args: flags_obj: Object containing parsed flag values. """ num_gpus = flags_core.get_num_gpus(flags_obj) # Add flag-defined parameters to params object params = PARAMS_MAP[flags_obj.param_set] if num_gpus > 1: if flags_obj.param_set == "big": params = model_params.BIG_MULTI_GPU_PARAMS elif flags_obj.param_set == "base": params = model_params.BASE_MULTI_GPU_PARAMS params["data_dir"] = flags_obj.data_dir params["model_dir"] = flags_obj.model_dir params["num_parallel_calls"] = flags_obj.num_parallel_calls params["tpu"] = flags_obj.tpu params["static_batch"] = flags_obj.static_batch params["allow_ffn_pad"] = True params["use_synthetic_data"] = flags_obj.use_synthetic_data # Set batch size parameter, which depends on the availability of # TPU and GPU, and distribution settings. params["batch_size"] = (flags_obj.batch_size or params["default_batch_size"]) params["batch_size"] = distribution_utils.per_device_batch_size( params["batch_size"], num_gpus) schedule_manager = schedule.Manager( train_steps=flags_obj.train_steps, steps_between_evals=flags_obj.steps_between_evals, train_epochs=flags_obj.train_epochs, epochs_between_evals=flags_obj.epochs_between_evals, default_train_epochs=DEFAULT_TRAIN_EPOCHS, batch_size=params["batch_size"], max_length=params["max_length"], use_tpu=False, num_tpu_shards=flags_obj.num_tpu_shards) params["repeat_dataset"] = schedule_manager.repeat_dataset model_helpers.apply_clean(flags.FLAGS) # Create hooks that log information about the training and metric values train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, model_dir=flags_obj.model_dir, tensors_to_log=TENSORS_TO_LOG, # used for logging hooks batch_size=schedule_manager.batch_size, # for ExamplesPerSecondHook use_tpu=False # Not all hooks can run with TPUs ) benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info(model_name="transformer", dataset_name="wmt_translate_ende", run_params=params, test_id=flags_obj.benchmark_test_id) # Train and evaluate transformer model # estimator = construct_estimator(flags_obj, params, schedule_manager) os.environ['TF_SYNC_ON_FINISH'] = '0' os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' sess_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, intra_op_parallelism_threads=0, gpu_options=tf.GPUOptions(force_gpu_compatible=True)) print("SESS_CONFIG: ", sess_config) config = RunConfig(session_config=sess_config, model_dir=params["model_dir"]) variable_strategy = 'GPU' use_distortion_for_training = True experiment_fn = get_experiment_fn(config.is_chief, flags_obj, params, schedule_manager, num_gpus, variable_strategy, use_distortion_for_training) #tf.contrib.learn.learn_runner.run(experiment_fn, run_config=config, hparams=tf.contrib.training.HParams(is_chief=config.is_chief, **hparams)) tf.contrib.learn.learn_runner.run(experiment_fn, run_config=config, hparams=tf.contrib.training.HParams( is_chief=config.is_chief, **params)) ''' run_loop( estimator=estimator, # Training arguments schedule_manager=schedule_manager, train_hooks=train_hooks, benchmark_logger=benchmark_logger, # BLEU calculation arguments bleu_source=flags_obj.bleu_source, bleu_ref=flags_obj.bleu_ref, bleu_threshold=flags_obj.stop_threshold, vocab_file=flags_obj.vocab_file) ''' if flags_obj.export_dir: serving_input_fn = export.build_tensor_serving_input_receiver_fn( shape=[None], dtype=tf.int64, batch_size=None) # Export saved model, and save the vocab file as an extra asset. The vocab # file is saved to allow consistent input encoding and output decoding. # (See the "Export trained model" section in the README for an example of # how to use the vocab file.) # Since the model itself does not use the vocab file, this file is saved as # an extra asset rather than a core asset. estimator.export_savedmodel( flags_obj.export_dir, serving_input_fn, assets_extra={"vocab.txt": flags_obj.vocab_file}, strip_default_attrs=True)
def resnet_main(flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. session_config = tf.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) if flags_core.get_num_gpus(flags_obj) == 0: distribution = tf.contrib.distribute.OneDeviceStrategy('device:CPU:0') elif flags_core.get_num_gpus(flags_obj) == 1: distribution = tf.contrib.distribute.OneDeviceStrategy('device:GPU:0') else: distribution = tf.contrib.distribute.MirroredStrategy( num_gpus=flags_core.get_num_gpus(flags_obj)) run_config = tf.estimator.RunConfig(train_distribute=distribution, session_config=session_config) classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj) }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + "-synthetic" benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('resnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, batch_size=flags_obj.batch_size) def input_fn_train(): return input_function(is_training=True, data_dir=flags_obj.data_dir, batch_size=per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=flags_obj.epochs_between_evals, num_gpus=flags_core.get_num_gpus(flags_obj)) def input_fn_eval(): return input_function(is_training=False, data_dir=flags_obj.data_dir, batch_size=per_device_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1) total_training_cycle = (flags_obj.train_epochs // flags_obj.epochs_between_evals) for cycle_index in range(total_training_cycle): tf.logging.info('Starting a training cycle: %d/%d', cycle_index, total_training_cycle) classifier.train(input_fn=input_fn_train, hooks=train_hooks, max_steps=flags_obj.max_train_steps) tf.logging.info('Starting to evaluate.') # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, which # will iterate forever. Passing steps=flags_obj.max_train_steps allows the # eval (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags_obj.stop_threshold, eval_results['accuracy']): break if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn)
def resnet_main(flags, model_function, input_function, num_train_samps, num_eval_samps, shape=None): """Shared main loop for ResNet Models. Args: flags: FLAGS object that contains the params for running. See ResnetArgParser for created flags. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. shape: list of ints representing the shape of the images used for training. This is only used if flags.export_dir is passed. """ # Using the Winograd non-fused algorithms provides a small performance boost. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' if flags.multi_gpu: validate_batch_size_for_multi_gpu(flags.batch_size) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( loss_reduction=tf.losses.Reduction.MEAN) # Create session config based on values of inter_op_parallelism_threads and # intra_op_parallelism_threads. Note that we default to having # allow_soft_placement = True, which is required for multi-GPU and not # harmful for other modes. myrank = 0 numworkers = 1 if (flags.enable_ml_comm == 1): # initialize the Cray PE ML Plugin # config the thread team (correcting the number of epochs for the effectice batch size)) #totsize = sum([reduce(lambda x, y: x*y, v.get_shape().as_list()) for v in tf.trainable_variables()]) totsize = 25551401 #Specific size for resnet50-v2 mc.init(2, 1, totsize, "tensorflow") myrank = mc.get_rank() numworkers = mc.get_nranks() if (myrank == 0): print("ResNet with {:9d} parameters".format(totsize)) max_steps_train = int( math.ceil(flags.train_epochs * (num_train_samps + num_eval_samps) / (mc.get_nranks() * flags.batch_size))) #(0,0,num_steps_before_going_nonblock, max_steps_train, verbose=1, how_often_to_print=100) mc.config_team(0, 0, max_steps_train, max_steps_train, 1, 100) flags.model_dir = flags.model_dir if mc.get_rank() == 0 else None flags.benchmark_log_dir = flags.benchmark_log_dir if mc.get_rank( ) == 0 else None flags.export_dir = flags.export_dir if mc.get_rank() == 0 else None else: rank_id = myrank session_config = tf.ConfigProto( log_device_placement=False, inter_op_parallelism_threads=flags.inter_op_parallelism_threads, intra_op_parallelism_threads=flags.intra_op_parallelism_threads, allow_soft_placement=True) # Set up a RunConfig to save checkpoint and set session config. run_config = tf.estimator.RunConfig().replace( save_checkpoints_steps=500, session_config=session_config) classifier = tf.estimator.Estimator(model_fn=model_function, model_dir=flags.model_dir, config=run_config, params={ 'resnet_size': flags.resnet_size, 'data_format': flags.data_format, 'batch_size': flags.batch_size, 'multi_gpu': flags.multi_gpu, 'train_epochs': flags.train_epochs, 'version': flags.version, 'loss_scale': flags.loss_scale, 'dtype': flags.dtype, 'mlcomm': flags.enable_ml_comm, 'log_freq': flags.global_perf_log_freq, 'weight_decay': flags.weight_decay, 'init_lr': flags.init_lr, 'base_lr': flags.base_lr, 'warmup_epochs': flags.warmup_epochs, 'log_freq': flags.global_perf_log_freq, }) benchmark_logger = logger.config_benchmark_logger(flags.benchmark_log_dir) benchmark_logger.log_run_info('resnet') for _ in range(flags.train_epochs // flags.epochs_between_evals): train_hooks = hooks_helper.get_train_hooks( flags.hooks, batch_size=flags.batch_size, benchmark_log_dir=flags.benchmark_log_dir) if (myrank == 0): print('Starting a training cycle.') def input_fn_train(): return input_function(True, flags.data_dir, flags.batch_size, flags.epochs_between_evals, flags.num_parallel_calls, flags.multi_gpu, numworkers, myrank) tsteps = math.ceil( float(flags.epochs_between_evals * num_train_samps) / (numworkers * flags.batch_size)) classifier.train(input_fn=input_fn_train, steps=tsteps, max_steps=flags.max_train_steps) if (myrank == 0): print('Starting to evaluate.') # Evaluate the model and print results def input_fn_eval(): return input_function(False, flags.data_dir, flags.batch_size, 3, flags.num_parallel_calls, flags.multi_gpu, numworkers, myrank) # flags.max_train_steps is generally associated with testing and profiling. # As a result it is frequently called with synthetic data, which will # iterate forever. Passing steps=flags.max_train_steps allows the eval # (which is generally unimportant in those circumstances) to terminate. # Note that eval will run for max_train_steps each loop, regardless of the # global_step count. esteps = math.ceil( float(num_eval_samps) / (numworkers * flags.batch_size)) eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=esteps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags.stop_threshold, eval_results['accuracy']): break if flags.export_dir is not None: warn_on_multi_gpu_export(flags.multi_gpu) # Exports a saved model for the given classifier. input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags.batch_size) classifier.export_savedmodel(flags.export_dir, input_receiver_fn) if (flags.enable_ml_comm == 1): mc.finalize()
def resnet_main(flags_obj, model_function, input_function, dataset_name, shape=None): """Shared main loop for ResNet Models. Args: flags_obj: An object containing parsed flags. See define_resnet_flags() for details. model_function: the function that instantiates the Model and builds the ops for train/eval. This will be passed directly into the estimator. input_function: the function that processes the dataset and returns a dataset that the estimator can train on. This will be wrapped with all the relevant flags for running and passed to estimator. dataset_name: the name of the dataset for training and evaluation. This is used for logging purpose. shape: list of ints representing the shape of the images used for training. This is only used if flags_obj.export_dir is passed. Dict of results of the run. Contains the keys `eval_results` and `train_hooks`. `eval_results` contains accuracy (top_1) and accuracy_top_5. `train_hooks` is a list the instances of hooks used during training. """ model_helpers.apply_clean(flags.FLAGS) # Ensures flag override logic is only executed if explicitly triggered. if flags_obj.tf_gpu_thread_mode: override_flags_and_set_envars_for_gpu_thread_pool(flags_obj) # Configures cluster spec for distribution strategy. num_workers = distribution_utils.configure_cluster(flags_obj.worker_hosts, flags_obj.task_index) # Creates session config. allow_soft_placement = True, is required for # multi-GPU and is not harmful for other modes. session_config = tf.compat.v1.ConfigProto( inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads, intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads, allow_soft_placement=True) distribution_strategy = distribution_utils.get_distribution_strategy( distribution_strategy=flags_obj.distribution_strategy, num_gpus=flags_core.get_num_gpus(flags_obj), num_workers=num_workers, all_reduce_alg=flags_obj.all_reduce_alg, num_packs=flags_obj.num_packs) # Creates a `RunConfig` that checkpoints every 24 hours which essentially # results in checkpoints determined only by `epochs_between_evals`. run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy, session_config=session_config, save_checkpoints_secs=60 * 60 * 24, save_checkpoints_steps=None) # Initializes model with all but the dense layer from pretrained ResNet. if flags_obj.pretrained_model_checkpoint_path is not None: warm_start_settings = tf.estimator.WarmStartSettings( flags_obj.pretrained_model_checkpoint_path, vars_to_warm_start='^(?!.*dense)') else: warm_start_settings = None classifier = tf.estimator.Estimator( model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, warm_start_from=warm_start_settings, params={ 'resnet_size': int(flags_obj.resnet_size), 'data_format': flags_obj.data_format, 'batch_size': flags_obj.batch_size, 'resnet_version': int(flags_obj.resnet_version), 'loss_scale': flags_core.get_loss_scale(flags_obj), 'dtype': flags_core.get_tf_dtype(flags_obj), 'fine_tune': flags_obj.fine_tune, 'num_workers': num_workers, }) run_params = { 'batch_size': flags_obj.batch_size, 'dtype': flags_core.get_tf_dtype(flags_obj), 'resnet_size': flags_obj.resnet_size, 'resnet_version': flags_obj.resnet_version, 'synthetic_data': flags_obj.use_synthetic_data, 'train_epochs': flags_obj.train_epochs, 'num_workers': num_workers, } if flags_obj.use_synthetic_data: dataset_name = dataset_name + '-synthetic' benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info('resnet', dataset_name, run_params, test_id=flags_obj.benchmark_test_id) train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) def input_fn_train(num_epochs, input_context=None): return input_function( is_training=True, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_replica_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=num_epochs, dtype=flags_core.get_tf_dtype(flags_obj), datasets_num_private_threads=flags_obj. datasets_num_private_threads, num_parallel_batches=flags_obj.datasets_num_parallel_batches, input_context=input_context) def input_fn_eval(): return input_function( is_training=False, data_dir=flags_obj.data_dir, batch_size=distribution_utils.per_replica_batch_size( flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), num_epochs=1, dtype=flags_core.get_tf_dtype(flags_obj)) train_epochs = (0 if flags_obj.eval_only or not flags_obj.train_epochs else flags_obj.train_epochs) use_train_and_evaluate = flags_obj.use_train_and_evaluate or num_workers > 1 if use_train_and_evaluate: train_spec = tf.estimator.TrainSpec( input_fn=lambda input_context=None: input_fn_train( train_epochs, input_context=input_context), hooks=train_hooks, max_steps=flags_obj.max_train_steps) eval_spec = tf.estimator.EvalSpec(input_fn=input_fn_eval) tf.compat.v1.logging.info('Starting to train and evaluate.') tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec) # tf.estimator.train_and_evalute doesn't return anything in multi-worker # case. return {} else: if train_epochs == 0: # If --eval_only is set, perform a single loop with zero train epochs. schedule, n_loops = [0], 1 else: # Compute the number of times to loop while training. All but the last # pass will train for `epochs_between_evals` epochs, while the last will # train for the number needed to reach `training_epochs`. For instance if # train_epochs = 25 and epochs_between_evals = 10 # schedule will be set to [10, 10, 5]. That is to say, the loop will: # Train for 10 epochs and then evaluate. # Train for another 10 epochs and then evaluate. # Train for a final 5 epochs (to reach 25 epochs) and then evaluate. n_loops = math.ceil(train_epochs / flags_obj.epochs_between_evals) schedule = [ flags_obj.epochs_between_evals for _ in range(int(n_loops)) ] schedule[-1] = train_epochs - sum(schedule[:-1]) # over counting. for cycle_index, num_train_epochs in enumerate(schedule): tf.compat.v1.logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops)) if num_train_epochs: # Since we are calling classifier.train immediately in each loop, the # value of num_train_epochs in the lambda function will not be changed # before it is used. So it is safe to ignore the pylint error here # pylint: disable=cell-var-from-loop classifier.train( input_fn=lambda input_context=None: input_fn_train( num_train_epochs, input_context=input_context), hooks=train_hooks, max_steps=flags_obj.max_train_steps) # flags_obj.max_train_steps is generally associated with testing and # profiling. As a result it is frequently called with synthetic data, # which will iterate forever. Passing steps=flags_obj.max_train_steps # allows the eval (which is generally unimportant in those circumstances) # to terminate. Note that eval will run for max_train_steps each loop, # regardless of the global_step count. tf.compat.v1.logging.info('Starting to evaluate.') eval_results = classifier.evaluate(input_fn=input_fn_eval, steps=flags_obj.max_train_steps) benchmark_logger.log_evaluation_result(eval_results) if model_helpers.past_stop_threshold(flags_obj.stop_threshold, eval_results['accuracy']): break if flags_obj.export_dir is not None: # Exports a saved model for the given classifier. export_dtype = flags_core.get_tf_dtype(flags_obj) if flags_obj.image_bytes_as_serving_input: input_receiver_fn = functools.partial(image_bytes_serving_input_fn, shape, dtype=export_dtype) else: input_receiver_fn = export.build_tensor_serving_input_receiver_fn( shape, batch_size=flags_obj.batch_size, dtype=export_dtype) classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn, strip_default_attrs=True) stats = {} stats['eval_results'] = eval_results stats['train_hooks'] = train_hooks return stats
def run_transformer(flags_obj): """Create tf.Estimator to train and evaluate transformer model. Args: flags_obj: Object containing parsed flag values. """ num_gpus = flags_core.get_num_gpus(flags_obj) # Add flag-defined parameters to params object params = PARAMS_MAP[flags_obj.param_set] if num_gpus > 1: if flags_obj.param_set == "big": params = model_params.BIG_MULTI_GPU_PARAMS elif flags_obj.param_set == "base": params = model_params.BASE_MULTI_GPU_PARAMS params["data_dir"] = flags_obj.data_dir params["model_dir"] = flags_obj.model_dir params["num_parallel_calls"] = flags_obj.num_parallel_calls params["tpu"] = flags_obj.tpu params["use_tpu"] = bool(flags_obj.tpu) # was a tpu specified. params["static_batch"] = flags_obj.static_batch or params["use_tpu"] params["allow_ffn_pad"] = not params["use_tpu"] params["use_synthetic_data"] = flags_obj.use_synthetic_data # added below to override the learning rate, warmup steps, max_length, vocab_size in parameter file. # params would now have data passed as a flag params["learning_rate"] = flags_obj.learning_rate params["learning_rate_warmup_steps"] = flags_obj.learning_rate_warmup_steps params["max_length"] = flags_obj.max_length params["vocab_size"] = flags_obj.vocab_size # added for selecting the learning rate scheme params["lr_scheme"] = flags_obj.lr_scheme params["warmup_init_lr"] = flags_obj.warmup_init_lr # added for selecting the optimizer algorithm params["opt_alg"] = flags_obj.opt_alg # added to provide optimizer parameters params["optimizer_sgd_momentum"] = flags_obj.optimizer_sgd_momentum params["optimizer_rms_decay"] = flags_obj.optimizer_rms_decay params["optimizer_rms_momentum"] = flags_obj.optimizer_rms_momentum params["optimizer_rms_epsilon"] = flags_obj.optimizer_rms_epsilon # added to overide layer_postprocess_dropout value params["layer_postprocess_dropout"] = flags_obj.layer_postprocess_dropout # Set batch size parameter, which depends on the availability of # TPU and GPU, and distribution settings. params["batch_size"] = (flags_obj.batch_size or ( params["default_batch_size_tpu"] if params["use_tpu"] else params["default_batch_size"])) # commented below to remove distribution strategy """ if not params["use_tpu"]: params["batch_size"] = distribution_utils.per_device_batch_size( params["batch_size"], num_gpus)""" schedule_manager = schedule.Manager( train_steps=flags_obj.train_steps, steps_between_evals=flags_obj.steps_between_evals, train_epochs=flags_obj.train_epochs, epochs_between_evals=flags_obj.epochs_between_evals, default_train_epochs=DEFAULT_TRAIN_EPOCHS, batch_size=params["batch_size"], max_length=params["max_length"], use_tpu=params["use_tpu"], num_tpu_shards=flags_obj.num_tpu_shards ) params["repeat_dataset"] = schedule_manager.repeat_dataset model_helpers.apply_clean(flags.FLAGS) # Create hooks that log information about the training and metric values train_hooks = hooks_helper.get_train_hooks( flags_obj.hooks, model_dir=flags_obj.model_dir, tensors_to_log=TENSORS_TO_LOG, # used for logging hooks batch_size=schedule_manager.batch_size, # for ExamplesPerSecondHook use_tpu=params["use_tpu"] # Not all hooks can run with TPUs ) # added for horovod bcast_hook = hvd.BroadcastGlobalVariablesHook(0) benchmark_logger = logger.get_benchmark_logger() benchmark_logger.log_run_info( model_name="transformer", dataset_name="wmt_translate_ende", run_params=params, test_id=flags_obj.benchmark_test_id) # added for horovod train_hooks.append(bcast_hook) # Train and evaluate transformer model estimator = construct_estimator(flags_obj, params, schedule_manager) run_loop( estimator=estimator, # Training arguments schedule_manager=schedule_manager, train_hooks=train_hooks, benchmark_logger=benchmark_logger, # BLEU calculation arguments bleu_source=flags_obj.bleu_source, bleu_ref=flags_obj.bleu_ref, bleu_threshold=flags_obj.stop_threshold, vocab_file=flags_obj.vocab_file) if flags_obj.export_dir and not params["use_tpu"]: serving_input_fn = export.build_tensor_serving_input_receiver_fn( shape=[None], dtype=tf.int64, batch_size=None) # Export saved model, and save the vocab file as an extra asset. The vocab # file is saved to allow consistent input encoding and output decoding. # (See the "Export trained model" section in the README for an example of # how to use the vocab file.) # Since the model itself does not use the vocab file, this file is saved as # an extra asset rather than a core asset. estimator.export_savedmodel( flags_obj.export_dir, serving_input_fn, assets_extra={"vocab.txt": flags_obj.vocab_file})