def main(argv): del argv # Unused. if FLAGS.mode == "eval": file_path = "validation*" else: file_path = "train*" data_directory = os.path.join(FLAGS.data_directory, file_path) filenames = tf.gfile.Glob(data_directory) shard_count = 0 for filename in sorted(filenames): shard = os.path.basename(filename) dest_dir = os.path.join(FLAGS.output_path, "imagenet", "predictions_dataframe", FLAGS.mode, shard) if not tf.gfile.IsDirectory(dest_dir): tf.gfile.MkDir(dest_dir) params = imagenet_params # shuffle is set to false to prevent output ordering of images update_params = { "sloppy_shuffle": False, "num_cores": 8, "base_learning_rate": 0.1, "weight_decay": 1e-4, "lr_schedule": [(1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80)], "momentum": 0.9, "data_format": "channels_last", "output_dir": dest_dir, "label_smoothing": FLAGS.label_smoothing, } params.update(update_params) global_step = int(os.path.basename(FLAGS.ckpt_dir).split("-")[1]) df = predictions_from_checkpoint_dir( directory_path=data_directory, filename=filename, params=params, ckpt_directory=FLAGS.ckpt_dir, global_step=global_step) timestamp = str(time.time()) output_path = os.path.join(dest_dir, "predictions_dataframe_{}.csv".format(timestamp)) with tf.gfile.Open(output_path, "w") as f: df.to_csv(f) shard_count += 1 logging("number of shards processed: ", shard_count)
def main(argv): del argv # Unused. tf.enable_resource_variables() tf.set_random_seed(FLAGS.seed) set_lr_schedule() set_custom_sparsity_map() folder_stub = os.path.join(FLAGS.training_method, str(FLAGS.end_sparsity), str(FLAGS.maskupdate_begin_step), str(FLAGS.maskupdate_end_step), str(FLAGS.maskupdate_frequency), str(FLAGS.drop_fraction), str(FLAGS.label_smoothing), str(FLAGS.weight_decay)) output_dir = FLAGS.output_dir if FLAGS.use_folder_stub: output_dir = os.path.join(output_dir, folder_stub) export_dir = os.path.join(output_dir, 'export_dir') # we pass the updated eval and train string to the params dictionary. params = {} params['output_dir'] = output_dir params['training_method'] = FLAGS.training_method params['use_tpu'] = FLAGS.use_tpu dataset_func = functools.partial( imagenet_input.ImageNetInput, data_dir=FLAGS.data_directory, transpose_input=False, num_parallel_calls=FLAGS.num_parallel_calls, use_bfloat16=False) imagenet_train, imagenet_eval = [ dataset_func(is_training=is_training) for is_training in [True, False] ] run_config = tpu_config.RunConfig( master=FLAGS.master, model_dir=output_dir, save_checkpoints_steps=FLAGS.steps_per_checkpoint, keep_checkpoint_max=FLAGS.keep_checkpoint_max, session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False), tpu_config=tpu_config.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_cores, tpu_job_name=FLAGS.tpu_job_name)) classifier = tpu_estimator.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=resnet_model_fn_w_pruning, params=params, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) cpu_classifier = tpu_estimator.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=resnet_model_fn_w_pruning, params=params, config=run_config, train_batch_size=FLAGS.train_batch_size, export_to_tpu=False, eval_batch_size=FLAGS.eval_batch_size) if FLAGS.num_eval_images % FLAGS.eval_batch_size != 0: raise ValueError( 'eval_batch_size (%d) must evenly divide num_eval_images(%d)!' % (FLAGS.eval_batch_size, FLAGS.num_eval_images)) eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size if FLAGS.mode == 'eval_once': ckpt_path = os.path.join(output_dir, FLAGS.eval_once_ckpt_prefix) dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval classifier.evaluate(input_fn=dataset.input_fn, steps=eval_steps, checkpoint_path=ckpt_path, name='{0}'.format(FLAGS.eval_once_ckpt_prefix)) elif FLAGS.mode == 'eval': # Run evaluation when there's a new checkpoint for ckpt in evaluation.checkpoints_iterator(output_dir): tf.logging.info('Starting to evaluate.') try: dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval classifier.evaluate(input_fn=dataset.input_fn, steps=eval_steps, checkpoint_path=ckpt, name='eval') # Terminate eval job when final checkpoint is reached global_step = int(os.path.basename(ckpt).split('-')[1]) if global_step >= FLAGS.train_steps: tf.logging.info( 'Evaluation finished after training step %d' % global_step) break except tf.errors.NotFoundError: logging('Checkpoint no longer exists,skipping checkpoint.') else: global_step = estimator._load_global_step_from_checkpoint_dir( output_dir) # Session run hooks to export model for prediction export_hook = ExportModelHook(cpu_classifier, export_dir) hooks = [export_hook] if FLAGS.mode == 'train': tf.logging.info('start training...') classifier.train(input_fn=imagenet_train.input_fn, hooks=hooks, max_steps=FLAGS.train_steps) else: assert FLAGS.mode == 'train_and_eval' tf.logging.info('start training and eval...') while global_step < FLAGS.train_steps: next_checkpoint = min(global_step + FLAGS.steps_per_eval, FLAGS.train_steps) classifier.train(input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) global_step = next_checkpoint logging('Completed training up to step :', global_step) classifier.evaluate(input_fn=imagenet_eval.input_fn, steps=eval_steps)
def main(_): if FLAGS.pruning_method in ['threshold']: folder_stub = os.path.join(FLAGS.pruning_method, str(FLAGS.end_sparsity), str(FLAGS.sparsity_begin_step), str(FLAGS.sparsity_end_step), str(FLAGS.pruning_frequency), str(FLAGS.label_smoothing)) elif FLAGS.pruning_method == 'variational_dropout': folder_stub = os.path.join(FLAGS.pruning_method, str(FLAGS.sparsity_begin_step), str(FLAGS.sparsity_end_step), str(FLAGS.reg_scalar), str(FLAGS.label_smoothing)) elif FLAGS.pruning_method == 'l0_regularization': folder_stub = os.path.join(FLAGS.pruning_method, str(FLAGS.sparsity_begin_step), str(FLAGS.sparsity_end_step), str(FLAGS.reg_scalar), str(FLAGS.label_smoothing)) elif FLAGS.pruning_method == 'baseline': folder_stub = os.path.join(FLAGS.pruning_method, str(0.0), str(0.0), str(0.0), str(0.0)) elif FLAGS.pruning_method == 'scratch': run_info = FLAGS.load_mask_dir.split('/') run_type = run_info[10] run_sparsity = run_info[11] run_begin = run_info[12] run_end = run_info[13] run_freq = run_info[14] run_label_smoothing = run_info[15] folder_stub = os.path.join(FLAGS.pruning_method, run_type, run_sparsity, run_begin, run_end, run_freq, run_label_smoothing, FLAGS.init_method) else: raise ValueError('Pruning method is not known %s' % (FLAGS.pruning_method)) output_dir = os.path.join(FLAGS.output_dir, folder_stub) export_dir = os.path.join(output_dir, 'export_dir') # we pass the updated eval and train string to the params dictionary. params = {} params['output_dir'] = output_dir params['pruning_method'] = FLAGS.pruning_method params['use_tpu'] = FLAGS.use_tpu params['log_alpha_threshold'] = FLAGS.log_alpha_threshold imagenet_train, imagenet_eval = [ imagenet_input.ImageNetInput( # pylint: disable=g-complex-comprehension is_training=is_training, data_dir=FLAGS.data_directory, transpose_input=False, num_parallel_calls=FLAGS.num_parallel_calls, use_bfloat16=False) for is_training in [True, False] ] run_config = tpu_config.RunConfig( master=FLAGS.master, model_dir=output_dir, save_checkpoints_steps=FLAGS.steps_per_checkpoint, keep_checkpoint_max=FLAGS.keep_checkpoint_max, session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False), tpu_config=tpu_config.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_cores, tpu_job_name=FLAGS.tpu_job_name)) classifier = tpu_estimator.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=resnet_model_fn_w_pruning, params=params, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) cpu_classifier = tpu_estimator.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=resnet_model_fn_w_pruning, params=params, config=run_config, train_batch_size=FLAGS.train_batch_size, export_to_tpu=False, eval_batch_size=FLAGS.eval_batch_size) if FLAGS.num_eval_images % FLAGS.eval_batch_size != 0: raise ValueError( 'eval_batch_size (%d) must evenly divide num_eval_images(%d)!' % (FLAGS.eval_batch_size, FLAGS.num_eval_images)) eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size if FLAGS.mode == 'eval_once': ckpt = FLAGS.output_dir + 'model.ckpt-{}'.format(FLAGS.checkpoint_step) classifier.evaluate(input_fn=imagenet_eval.input_fn, steps=eval_steps, checkpoint_path=ckpt, name='{0}'.format( int(FLAGS.log_alpha_threshold * 10))) elif FLAGS.mode == 'eval': # Run evaluation when there's a new checkpoint for ckpt in evaluation.checkpoints_iterator(output_dir): print('Starting to evaluate.') try: classifier.evaluate(input_fn=imagenet_eval.input_fn, steps=eval_steps, checkpoint_path=ckpt, name='{0}'.format( int(FLAGS.log_alpha_threshold * 10))) # Terminate eval job when final checkpoint is reached global_step = int(os.path.basename(ckpt).split('-')[1]) if global_step >= FLAGS.train_steps: print('Evaluation finished after training step %d' % global_step) break except tf.errors.NotFoundError: logging('Checkpoint no longer exists,skipping checkpoint.') else: global_step = tf.estimator._load_global_step_from_checkpoint_dir( output_dir) # pylint: disable=protected-access,line-too-long # Session run hooks to export model for prediction export_hook = ExportModelHook(cpu_classifier, export_dir) hooks = [export_hook] if FLAGS.mode == 'train': print('start training...') classifier.train(input_fn=imagenet_train.input_fn, hooks=hooks, max_steps=FLAGS.train_steps) else: assert FLAGS.mode == 'train_and_eval' print('start training and eval...') while global_step < FLAGS.train_steps: next_checkpoint = min(global_step + FLAGS.steps_per_eval, FLAGS.train_steps) classifier.train(input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) global_step = next_checkpoint logging('Completed training up to step :', global_step) classifier.evaluate(input_fn=imagenet_eval.input_fn, steps=eval_steps)