def define_mnist_flags(): flags.DEFINE_integer('eval_secs', 10, 'How frequently to run evaluation step') flags.DEFINE_integer('ckpt_steps', 100, 'How frequently to save a model checkpoin') flags.DEFINE_integer('max_ckpts', 2, 'Maximum number of checkpoints to keep') flags.DEFINE_integer('max_steps', os.environ.get('MAX_STEPS', 100), 'Max steps') flags.DEFINE_integer('save_summary_steps', 10, 'How frequently to save TensorBoard summaries') flags.DEFINE_integer('log_step_count_steps', 10, 'How frequently to log loss & global steps/s') flags_core.define_base() flags_core.define_performance(num_parallel_calls=False) flags_core.define_image() data_dir = os.path.abspath( os.environ.get('PS_JOBSPACE', os.getcwd()) + '/data') model_dir = os.path.abspath( os.environ.get('PS_MODEL_PATH', os.getcwd() + '/models') + '/mnist') export_dir = os.path.abspath( os.environ.get('PS_MODEL_PATH', os.getcwd() + '/models')) flags.adopt_module_key_flags(flags_core) flags_core.set_defaults( data_dir=data_dir, model_dir=model_dir, export_dir=export_dir, train_epochs=int(os.environ.get('TRAIN_EPOCHS', 3)), epochs_between_evals=int(os.environ.get('EPOCHS_EVAL', 5)), batch_size=int(os.environ.get('BATCH_SIZE', 100)), )
def define_mnist_flags(): flags_core.define_base() flags_core.define_image() flags.adopt_module_key_flags(flags_core) flags_core.set_defaults(data_dir='/tmp/mnist_data', model_dir='/tmp/mnist_model', batch_size=100, train_epochs=40)
def define_mnist_flags(): flags_core.define_base() flags_core.define_performance(num_parallel_calls=False) flags_core.define_image() flags.adopt_module_key_flags(flags_core) flags_core.set_defaults(data_dir='/tmp/mnist_data', model_dir='/tmp/mnist_model', batch_size=100, train_epochs=40)
def define_mnist_flags(): flags_core.define_base() flags_core.define_performance(num_parallel_calls=False) flags_core.define_image() flags.adopt_module_key_flags(flags_core) flags_core.set_defaults( data_dir= "/workspace/zigangzhao/TensoFlowBDD/models/official/mnist/mnist_data", model_dir= "/workspace/zigangzhao/TensoFlowBDD/models/official/mnist/mnist_model/", batch_size=100, train_epochs=40)
def define_mnist_flags(): flags_core.define_base() flags_core.define_performance(num_parallel_calls=False) flags_core.define_image() data_dir = os.path.abspath(os.environ.get('PS_JOBSPACE', os.getcwd()) + '/data') model_dir = os.path.abspath(os.environ.get('PS_MODEL_PATH', os.getcwd() + '/models') + '/mnist') flags.adopt_module_key_flags(flags_core) flags_core.set_defaults(data_dir=data_dir, model_dir=model_dir, export_dir=os.environ.get('PS_MODEL_PATH', os.getcwd() + '/models'), batch_size=int(os.environ.get('batch_size', 100)), epochs_between_evals=20, train_epochs=int(os.environ.get('train_epochs', 40)))
def define_flags(): flags_core.define_base(clean=True, num_gpu=False, stop_threshold=True, hooks=True, train_epochs=True, epochs_between_evals=True) flags_core.define_performance(num_parallel_calls=True, inter_op=True, intra_op=True, dynamic_loss_scale=True, loss_scale=True, synthetic_data=True, dtype=True) flags_core.define_image() flags_core.define_benchmark()
def define_resnet_flags(resnet_size_choices=None): """Add flags and validators for ResNet.""" flags_core.define_base() flags_core.define_performance(num_parallel_calls=False) flags_core.define_image() flags_core.define_benchmark() flags.adopt_module_key_flags(flags_core) flags.DEFINE_enum( name='resnet_version', short_name='rv', default='1', enum_values=['1', '2'], help=flags_core.help_wrap( 'Version of ResNet. (1 or 2) See README.md for details.')) flags.DEFINE_bool( name='fine_tune', short_name='ft', default=False, help=flags_core.help_wrap( 'If True do not train any parameters except for the final layer.')) flags.DEFINE_string( name='pretrained_model_checkpoint_path', short_name='pmcp', default=None, help=flags_core.help_wrap( 'If not None initialize all the network except the final layer with ' 'these values')) flags.DEFINE_boolean(name='eval_only', default=False, help=flags_core.help_wrap( 'Skip training and only perform evaluation on ' 'the latest checkpoint.')) choice_kwargs = dict( name='resnet_size', short_name='rs', default='50', help=flags_core.help_wrap('The size of the ResNet model to use.')) if resnet_size_choices is None: flags.DEFINE_string(**choice_kwargs) else: flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
def define_mnist_eager_flags(): """Defined flags and defaults for MNIST in eager mode.""" flags_core.define_base_eager() flags_core.define_image() flags.adopt_module_key_flags(flags_core) flags.DEFINE_integer( name='log_interval', short_name='li', default=10, help=flags_core.help_wrap('batches between logging training status')) flags.DEFINE_string( name='output_dir', short_name='od', default='/tmp/tensorflow/mnist/', help=flags_core.help_wrap('Directory to write TensorBoard summaries')) flags.DEFINE_float(name='learning_rate', short_name='lr', default=0.01, help=flags_core.help_wrap('Learning rate.')) flags.DEFINE_float(name='momentum', short_name='m', default=0.5, help=flags_core.help_wrap('SGD momentum.')) flags.DEFINE_bool(name='no_gpu', short_name='nogpu', default=False, help=flags_core.help_wrap( 'disables GPU usage even if a GPU is available')) flags_core.set_defaults( data_dir='/tmp/tensorflow/mnist/input_data', model_dir='/tmp/tensorflow/mnist/checkpoints/', batch_size=100, train_epochs=10, )
def define_resnet_flags(resnet_size_choices=None): """Add flags and validators for ResNet.""" flags_core.define_base() flags_core.define_performance(num_parallel_calls=False) flags_core.define_image() flags_core.define_benchmark() flags.adopt_module_key_flags(flags_core) flags.DEFINE_enum( name='resnet_version', short_name='rv', default='2', enum_values=['1', '2'], help=flags_core.help_wrap( 'Version of ResNet. (1 or 2) See README.md for details.')) choice_kwargs = dict( name='resnet_size', short_name='rs', default='50', help=flags_core.help_wrap('The size of the ResNet model to use.')) if resnet_size_choices is None: flags.DEFINE_string(**choice_kwargs) else: flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs) # The current implementation of ResNet v1 is numerically unstable when run # with fp16 and will produce NaN errors soon after training begins. msg = ('ResNet version 1 is not currently supported with fp16. ' 'Please use version 2 instead.') @flags.multi_flags_validator(['dtype', 'resnet_version'], message=msg) def _forbid_v1_fp16(flag_values): # pylint: disable=unused-variable return (flags_core.DTYPE_MAP[flag_values['dtype']][0] != tf.float16 or flag_values['resnet_version'] != '1')
def define_flags(): flags_core.define_base(num_gpu=False) flags_core.define_performance(dynamic_loss_scale=True, loss_scale=True) flags_core.define_image() flags_core.define_benchmark()
def define_keras_flags(dynamic_loss_scale=True): """Define flags for Keras models.""" flags_core.define_base(clean=True, num_gpu=True, run_eagerly=True, train_epochs=True, epochs_between_evals=True, distribution_strategy=True) flags_core.define_performance(num_parallel_calls=False, synthetic_data=True, dtype=True, all_reduce_alg=True, num_packs=True, tf_gpu_thread_mode=True, datasets_num_private_threads=True, dynamic_loss_scale=dynamic_loss_scale, loss_scale=True, fp16_implementation=True, tf_data_experimental_slack=True, enable_xla=True, force_v2_in_keras_compile=True, training_dataset_cache=True) flags_core.define_image() flags_core.define_benchmark() flags_core.define_distribution() flags.adopt_module_key_flags(flags_core) flags.DEFINE_boolean(name='enable_eager', default=False, help='Enable eager?') flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?') # TODO(b/135607288): Remove this flag once we understand the root cause of # slowdown when setting the learning phase in Keras backend. flags.DEFINE_boolean( name='set_learning_phase_to_train', default=True, help='If skip eval, also set Keras learning phase to 1 (training).') flags.DEFINE_boolean( name='explicit_gpu_placement', default=False, help='If not using distribution strategy, explicitly set device scope ' 'for the Keras training loop.') flags.DEFINE_boolean(name='use_trivial_model', default=False, help='Whether to use a trivial Keras model.') flags.DEFINE_boolean(name='report_accuracy_metrics', default=True, help='Report metrics during training and evaluation.') flags.DEFINE_boolean( name='use_tensor_lr', default=False, help='Use learning rate tensor instead of a callback.') flags.DEFINE_boolean(name='enable_tensorboard', default=False, help='Whether to enable Tensorboard callback.') flags.DEFINE_integer( name='train_steps', default=None, help='The number of steps to run for training. If it is larger than ' '# batches per epoch, then use # batches per epoch. This flag will be ' 'ignored if train_epochs is set to be larger than 1. ') flags.DEFINE_string( name='profile_steps', default=None, help= 'Save profiling data to model dir at given range of global steps. The ' 'value must be a comma separated pair of positive integers, specifying ' 'the first and last step to profile. For example, "--profile_steps=2,4" ' 'triggers the profiler to process 3 steps, starting from the 2nd step. ' 'Note that profiler has a non-trivial performance overhead, and the ' 'output file can be gigantic if profiling many steps.') flags.DEFINE_boolean( name='batchnorm_spatial_persistent', default=True, help='Enable the spacial persistent mode for CuDNN batch norm kernel.') flags.DEFINE_boolean( name='enable_get_next_as_optional', default=False, help='Enable get_next_as_optional behavior in DistributedIterator.') flags.DEFINE_boolean( name='enable_checkpoint_and_export', default=False, help= 'Whether to enable a checkpoint callback and export the savedmodel.') flags.DEFINE_string(name='tpu', default='', help='TPU address to connect to.') flags.DEFINE_integer( name='steps_per_loop', default=1, help='Number of steps per graph-mode loop. Only training step happens ' 'inside the loop. Callbacks will not be called inside. Will be capped at ' 'steps per epoch.')
def define_resnet_flags(resnet_size_choices=None): """Add flags and validators for ResNet.""" flags_core.define_base() flags_core.define_performance(num_parallel_calls=False, tf_gpu_thread_mode=True, datasets_num_private_threads=True, datasets_num_parallel_batches=True) flags_core.define_image() flags_core.define_benchmark() flags.adopt_module_key_flags(flags_core) flags.DEFINE_enum( name='resnet_version', short_name='rv', default='2', enum_values=['1', '2'], help=flags_core.help_wrap( 'Version of ResNet. (1 or 2) See README.md for details.')) flags.DEFINE_bool( name='fine_tune', short_name='ft', default=False, help=flags_core.help_wrap( 'If True do not train any parameters except for the final layer.')) flags.DEFINE_string( name='pretrained_model_checkpoint_path', short_name='pmcp', default=None, help=flags_core.help_wrap( 'If not None initialize all the network except the final layer with ' 'these values')) flags.DEFINE_boolean(name='eval_only', default=False, help=flags_core.help_wrap( 'Skip training and only perform evaluation on ' 'the latest checkpoint.')) flags.DEFINE_boolean( name='image_bytes_as_serving_input', default=False, help=flags_core.help_wrap( 'If True exports savedmodel with serving signature that accepts ' 'JPEG image bytes instead of a fixed size [HxWxC] tensor that ' 'represents the image. The former is easier to use for serving at ' 'the expense of image resize/cropping being done as part of model ' 'inference. Note, this flag only applies to ImageNet and cannot ' 'be used for CIFAR.')) flags.DEFINE_float(name='reconst_loss_scale', default=10.0, help=flags_core.help_wrap('scale the reconst_loss')) flags.DEFINE_boolean( name='use_ce', default=False, help=flags_core.help_wrap( 'use cross entropy loss for compressive sensing training')) flags.DEFINE_string( name='optimizer', short_name='opt', # default='sgd', default='adam', help=flags_core.help_wrap('Choose optimizer for training')) flags.DEFINE_boolean( name='clip_grad', default=False, help=flags_core.help_wrap('whether to clip weights during training')) flags.DEFINE_boolean(name='spectral_norm', short_name='sn', default=True, help=flags_core.help_wrap( 'whether to user spectral norm in the cs part')) flags.DEFINE_float(name='ce_scale', default=1.0, help=flags_core.help_wrap('scale the cross_entropy')) flags.DEFINE_boolean( name='sep_grad_nrom', default=False, help=flags_core.help_wrap( 'spearate the gradients from reconstruction and ce, and norm the ce grad' )) flags.DEFINE_boolean( name='norm_teach_feature', default=False, help=flags_core.help_wrap( 'norm each channel of teaching feature with BN params')) flags.DEFINE_boolean(name='no_dense_init', default=False, help=flags_core.help_wrap( 'dont init resenet/dense during fine tuning')) flags.DEFINE_float(name='compress_ratio', default=0.1, help=flags_core.help_wrap( 'the compress ratio of the offloading layer')) choice_kwargs = dict( name='resnet_size', short_name='rs', default='50', help=flags_core.help_wrap('The size of the ResNet model to use.')) if resnet_size_choices is None: flags.DEFINE_string(**choice_kwargs) else: flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
def define_keras_benchmark_flags(): """Add flags for keras built-in application models.""" flags_core.define_base(hooks=False) flags_core.define_performance() flags_core.define_image() flags_core.define_benchmark() flags.adopt_module_key_flags(flags_core) flags_core.set_defaults(data_format="channels_last", use_synthetic_data=True, batch_size=32, train_epochs=2) flags.DEFINE_enum(name="model", default=None, enum_values=MODELS.keys(), case_sensitive=False, help=flags_core.help_wrap("Model to be benchmarked.")) flags.DEFINE_integer( name="num_train_images", default=1000, help=flags_core.help_wrap( "The number of synthetic images for training. The default value is " "1000.")) flags.DEFINE_integer( name="num_eval_images", default=50, help=flags_core.help_wrap( "The number of synthetic images for evaluation. The default value is " "50.")) flags.DEFINE_boolean( name="eager", default=False, help=flags_core.help_wrap( "To enable eager execution. Note that if eager execution is enabled, " "only one GPU is utilized even if multiple GPUs are provided and " "multi_gpu_model is used.")) flags.DEFINE_boolean( name="dist_strat", default=False, help=flags_core.help_wrap( "To enable distribution strategy for model training and evaluation. " "Number of GPUs used for distribution strategy can be set by the " "argument --num_gpus.")) flags.DEFINE_list( name="callbacks", default=["ExamplesPerSecondCallback", "LoggingMetricCallback"], help=flags_core.help_wrap( "A list of (case insensitive) strings to specify the names of " "callbacks. For example: `--callbacks ExamplesPerSecondCallback," "LoggingMetricCallback`")) @flags.multi_flags_validator( ["eager", "dist_strat"], message="Both --eager and --dist_strat were set. Only one can be " "defined, as DistributionStrategy is not supported in Eager " "execution currently.") # pylint: disable=unused-variable def _check_eager_dist_strat(flag_dict): return not (flag_dict["eager"] and flag_dict["dist_strat"])
def define_flags(): flags_core.define_base(num_gpu=False) flags_core.define_performance() flags_core.define_image() flags_core.define_benchmark()
def define_resnet_flags(resnet_size_choices=None, dynamic_loss_scale=False, fp16_implementation=False): """Add flags and validators for ResNet.""" flags_core.define_base() flags_core.define_performance(num_parallel_calls=False, tf_gpu_thread_mode=True, datasets_num_private_threads=True, dynamic_loss_scale=dynamic_loss_scale, fp16_implementation=fp16_implementation, loss_scale=True, tf_data_experimental_slack=True) flags_core.define_image() flags_core.define_benchmark() flags.adopt_module_key_flags(flags_core) flags.DEFINE_enum( name='resnet_version', short_name='rv', default='2', enum_values=['1', '2'], help=flags_core.help_wrap( 'Version of ResNet. (1 or 2) See README.md for details.')) flags.DEFINE_bool( name='fine_tune', short_name='ft', default=False, help=flags_core.help_wrap( 'If True do not train any parameters except for the final layer.')) flags.DEFINE_string( # "/home/zxc/Liu/models-master-new/official/r1/resnet/model/" name='pretrained_model_checkpoint_path', short_name='pmcp', default="/home/zxc/Liu/models-master-new/official/r1/resnet/model/", # default=None, help=flags_core.help_wrap( 'If not None initialize all the network except the final layer with ' 'these values')) flags.DEFINE_boolean(name='eval_only', default=False, help=flags_core.help_wrap( 'Skip training and only perform evaluation on ' 'the latest checkpoint.')) flags.DEFINE_boolean( name='image_bytes_as_serving_input', default=False, help=flags_core.help_wrap( 'If True exports savedmodel with serving signature that accepts ' 'JPEG image bytes instead of a fixed size [HxWxC] tensor that ' 'represents the image. The former is easier to use for serving at ' 'the expense of image resize/cropping being done as part of model ' 'inference. Note, this flag only applies to ImageNet and cannot ' 'be used for CIFAR.')) flags.DEFINE_boolean( name='use_train_and_evaluate', default=False, help=flags_core.help_wrap( 'If True, uses `tf.estimator.train_and_evaluate` for the training ' 'and evaluation loop, instead of separate calls to `classifier.train ' 'and `classifier.evaluate`, which is the default behavior.')) flags.DEFINE_string( name='worker_hosts', default=None, help=flags_core.help_wrap( 'Comma-separated list of worker ip:port pairs for running ' 'multi-worker models with DistributionStrategy. The user would ' 'start the program on each host with identical value for this flag.' )) flags.DEFINE_integer(name='task_index', default=-1, help=flags_core.help_wrap( 'If multi-worker training, the task_index of ' 'this worker.')) flags.DEFINE_bool(name='enable_lars', default=False, help=flags_core.help_wrap( 'Enable LARS optimizer for large batch training.')) flags.DEFINE_float( name='label_smoothing', default=0.0, help=flags_core.help_wrap( 'Label smoothing parameter used in the softmax_cross_entropy')) flags.DEFINE_float(name='weight_decay', default=1e-4, help=flags_core.help_wrap( 'Weight decay coefficiant for l2 regularization.')) choice_kwargs = dict( name='resnet_size', short_name='rs', default='50', help=flags_core.help_wrap('The size of the ResNet model to use.')) if resnet_size_choices is None: flags.DEFINE_string(**choice_kwargs) else: flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)