def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() # Print ENV Variables tf.logging.debug('=' * 20 + ' Environment Variables ' + '=' * 20) for k, v in os.environ.items(): tf.logging.debug('{}: {}'.format(k, v)) with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging, params.model): params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() bench.run()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() handler = benchmark_handler.Handler(params) params = handler.params params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params, dataset=handler.dataset, model=handler.model) handler.set_bench(bench) if getattr(bench.input_preprocessor, 'set_aug_list', None): bench.input_preprocessor.set_aug_list(params.aug_list) bench.benchmark_one_step = handler.benchmark_one_step bench.print_eval_results = handler.print_eval_results bench.check_early_stop = handler.check_early_stop bench.accum_grads = handler.accum_grads bench.build_fetches_forward = handler.build_fetches_forward if params.memory_saving_method == 'recomputing': bench.memory_saving = ms.Memory_Saving(benchmark_cnn=bench) # tfversion = util.tensorflow_version_tuple() # logging.info('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() bench.run()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() print('num_inter_threads: ' + str(params.num_inter_threads)) print('num_intra_threads: ' + str(params.num_intra_threads)) print('datasets_num_private_threads: ' + str(params.datasets_num_private_threads)) print('datasets_use_prefetch: ' + str(params.datasets_use_prefetch)) print('datasets_prefetch_buffer_size: ' + str(params.datasets_prefetch_buffer_size)) bench.run()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) options = make_options_from_flags(FLAGS) params = benchmark_cnn.make_params_from_flags() params = params._replace(batch_size=options.batch_size) params = params._replace(model='MY_GTSRB') params = params._replace(num_epochs=options.num_epochs) params = params._replace(num_gpus=options.num_gpus) params = params._replace(data_format='NHWC') params = params._replace(train_dir=options.checkpoint_folder) params = params._replace(allow_growth=True) params = params._replace(variable_update='replicated') params = params._replace(local_parameter_device='gpu') params = params._replace(use_tf_layers=False) # params = params._replace(all_reduce_spec='nccl') # params = params._replace(bottom_file=options.bottom_file) # params = params._replace(affine_files=options.affine_files) # params = params._replace(affine_classes=options.affine_classes) params = params._replace(optimizer=options.optimizer) params = params._replace(weight_decay=options.weight_decay) #params = params._replace(print_training_accuracy=True) params = params._replace(backbone_model_path=options.backbone_model_path) # Summary and Save & load checkpoints. # params = params._replace(summary_verbosity=1) # params = params._replace(save_summaries_steps=10) # params = params._replace(save_model_secs=3600) # save every 1 hour params = params._replace(save_model_secs=60) #save every 5 min params = benchmark_cnn.setup(params) #testtest(params) #exit(0) if 'test' in options.data_dir: dataset = GTSRBTestDataset(options) else: dataset = GTSRBDataset(options) model = Model_Builder(options.model_name, dataset.num_classes, options, params) bench = benchmark_cnn.BenchmarkCNN(params, dataset=dataset, model=model) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() bench.run() tf.reset_default_graph()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() with log_context(LOGGER_URL, LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB, LOGGER_SERIES, machine=LOGGER_VM): bench.run()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) import sys if params.enable_dmo == True: if LoadFileSystem() == False: sys.exit(-1) else : print("\n*******DMO enabled********\n") # sys.exit(0) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() bench.run()
def main(_): params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) if params.model == 'test_model': run_with_test_model(params) else: run_with_real_model(params)
def main(_): params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() bench.run()
def main(_): # Build benchmark_cnn model params = benchmark_cnn.make_params_from_flags() params, sess_config = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) # Print informaton tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() # Build single-GPU benchmark_cnn model with tf.Graph().as_default() as single_gpu_graph: bench.build_model() def run(sess, num_iters, tensor_or_op_name_to_replica_names, num_workers, worker_id, num_replicas_per_worker): fetches = { 'global_step': tensor_or_op_name_to_replica_names[bench.global_step.name][0], 'cost': tensor_or_op_name_to_replica_names[bench.cost.name][0], 'train_op': tensor_or_op_name_to_replica_names[bench.train_op.name][0], } if isinstance(bench.lr, tf.Tensor): fetches['lr'] = tensor_or_op_name_to_replica_names[ bench.lr.name][0] start = time.time() for i in range(num_iters): results = sess.run(fetches) if i % FLAGS.log_frequency == 0: end = time.time() throughput = float(FLAGS.log_frequency) / float(end - start) parallax.log.info( "global step: %d, lr: %f, loss: %f, " "throughput: %f steps/sec" % (results['global_step'], results['lr'] if 'lr' in results else bench.lr, results['cost'], throughput)) start = time.time() config = parallax_config.build_config() config.sess_config = sess_config parallax.parallel_run(single_gpu_graph, run, FLAGS.resource_info_file, FLAGS.max_steps, sync=FLAGS.sync, parallax_config=config)
def main(positional_arguments): assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) options = make_options_from_flags(FLAGS) params = benchmark_cnn.make_params_from_flags() params = params._replace(batch_size=options.batch_size) params = params._replace(model='MY_GTSRB') params = params._replace(num_epochs=options.num_epochs) params = params._replace(num_gpus=options.num_gpus) params = params._replace(data_format='NHWC') params = params._replace(train_dir=options.checkpoint_folder) params = params._replace(allow_growth=True) params = params._replace(variable_update='replicated') params = params._replace(local_parameter_device='gpu') params = params._replace(use_tf_layers=False) # params = params._replace(all_reduce_spec='nccl') # params = params._replace(bottom_file=options.bottom_file) # params = params._replace(affine_files=options.affine_files) # params = params._replace(affine_classes=options.affine_classes) params = params._replace(optimizer=options.optimizer) params = params._replace(weight_decay=options.weight_decay) params = params._replace(print_training_accuracy=True) params = params._replace(backbone_model_path=options.backbone_model_path) # Summary and Save & load checkpoints. # params = params._replace(summary_verbosity=1) # params = params._replace(save_summaries_steps=10) params = params._replace(save_model_secs=3600) # save every 1 hour # params = params._replace(save_model_secs=300) #save every 5 min params = benchmark_cnn.setup(params) dataset = CifarDataset(options) model = Model_Builder(options.model_name, dataset.num_classes, options, params) bench = benchmark_cnn.BenchmarkCNN(params, dataset=dataset, model=model) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() bench.run()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) run_benchmark(bench, absl_flags.FLAGS.iters_per_step)
def main(extra_flags): # extra_flags is a list of command line arguments, excluding those defined # in tf.flags.FLAGS. extra_flags[0] is always the program name. It is an error # to supply flags not defined with tf.flags.FLAGS, so we raise an ValueError # in that case. assert len(extra_flags) >= 1 if len(extra_flags) > 1: raise ValueError('Received unknown flags: %s' % extra_flags[1:]) params = benchmark_cnn.make_params_from_flags() benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() bench.run()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError("Received unknown positional arguments: %s" % positional_arguments[1:]) params = benchmark_cnn.make_params_from_flags() with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging, params.model): params = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn("TensorFlow: %i.%i" % (tfversion[0], tfversion[1])) bench.print_info() bench.run()
def main(_): # Build benchmark_cnn model params = benchmark_cnn.make_params_from_flags() params, sess_config = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) # Print informaton tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() # Build single-GPU benchmark_cnn model single_gpu_graph = tf.Graph() with single_gpu_graph.as_default(): bench.build_model() config = parallax_config.build_config() config.sess_config = sess_config sess, num_workers, worker_id, num_replicas_per_worker = \ parallax.parallel_run(single_gpu_graph, FLAGS.resource_info_file, sync=FLAGS.sync, parallax_config=config) fetches = { 'global_step': bench.global_step, 'cost': bench.cost, 'train_op': bench.train_op, } start = time.time() for i in range(FLAGS.max_steps): results = sess.run(fetches) if (i + 1) % FLAGS.log_frequency == 0: end = time.time() throughput = float(FLAGS.log_frequency) / float(end - start) parallax.log.info( "global step: %d, loss: %f, throughput: %f steps/sec" % (results['global_step'][0] + 1, results['cost'][0], throughput)) start = time.time()
def main(positional_arguments): # Command-line arguments like '--distortions False' are equivalent to # '--distortions=True False', where False is a positional argument. To prevent # this from silently running with distortions, we do not allow positional # arguments. # For DGX servers use hierarchical_copy=True argument assert len(positional_arguments) >= 1 if len(positional_arguments) > 1: raise ValueError('Received unknown positional arguments: %s' % positional_arguments[1:]) tests_models = [ { 'num_gpus': None, 'batch_size': 64, 'variable_update': 'parameter_server', 'model': 'inception3' }, { 'num_gpus': None, 'batch_size': 64, 'variable_update': 'parameter_server', 'model': 'resnet50' }, { 'num_gpus': None, 'batch_size': 32, 'variable_update': 'parameter_server', 'model': 'resnet152' }, #batch=64 crashes { 'num_gpus': None, 'batch_size': 64, 'variable_update': 'replicated', 'model': 'vgg16' }, { 'num_gpus': None, 'batch_size': 512, 'variable_update': 'replicated', 'model': 'alexnet' } ] test_gpus = [1, 2, 4, 8] stats = [] for test in tests_models: for num_gpus in test_gpus: test['num_gpus'] = num_gpus params = benchmark_cnn.make_params_from_flags() params = benchmark_cnn.setup(params) # force --hierarchical_copy to False when using 1 GPU if num_gpus == 1: params = params._replace(hierarchical_copy=False) params = params._replace(num_gpus=test['num_gpus'], batch_size=test['batch_size'], model=test['model'], variable_update=test['variable_update']) bench = benchmark_cnn.BenchmarkCNN(params) tfversion = cnn_util.tensorflow_version_tuple() log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) bench.print_info() results = bench.run() # result # { # 'average_wall_time': 0.6646941304206848, # 'images_per_sec': 385.1395525908701, # 'last_average_loss': 7.256145, # 'num_steps': 100, # 'num_workers': 1 # } stats.append({'test': test.copy(), 'result': results}) # summary print('summary:') print('==========') pprint.pprint(stats) print('==========') s = '' for i in range(len(test_gpus)): for j in range(len(tests_models)): s += str(stats[i + j * len(test_gpus)]['result']['images_per_sec']) s += ', ' s += '\n' print(s) print('==========')
def main(_): FLAGS.eval = True params = benchmark_cnn.make_params_from_flags() params, config = benchmark_cnn.setup(params) bench = benchmark_cnn.BenchmarkCNN(params) bench.evaluate()
def main(_): params = benchmark_cnn.make_params_from_flags() models = [ 'alexnet', ]