コード例 #1
0
def main(argv):
  del argv  # Unused.

  if FLAGS.mode == "eval":
    file_path = "validation*"
  else:
    file_path = "train*"

  data_directory = os.path.join(FLAGS.data_directory, file_path)
  filenames = tf.gfile.Glob(data_directory)

  shard_count = 0
  for filename in sorted(filenames):
    shard = os.path.basename(filename)
    dest_dir = os.path.join(FLAGS.output_path, "imagenet",
                            "predictions_dataframe", FLAGS.mode, shard)
    if not tf.gfile.IsDirectory(dest_dir):
      tf.gfile.MkDir(dest_dir)

    params = imagenet_params
    # shuffle is set to false to prevent output ordering of images
    update_params = {
        "sloppy_shuffle": False,
        "num_cores": 8,
        "base_learning_rate": 0.1,
        "weight_decay": 1e-4,
        "lr_schedule": [(1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80)],
        "momentum": 0.9,
        "data_format": "channels_last",
        "output_dir": dest_dir,
        "label_smoothing": FLAGS.label_smoothing,
    }
    params.update(update_params)
    global_step = int(os.path.basename(FLAGS.ckpt_dir).split("-")[1])

    df = predictions_from_checkpoint_dir(
        directory_path=data_directory,
        filename=filename,
        params=params,
        ckpt_directory=FLAGS.ckpt_dir,
        global_step=global_step)
    timestamp = str(time.time())
    output_path = os.path.join(dest_dir,
                               "predictions_dataframe_{}.csv".format(timestamp))
    with tf.gfile.Open(output_path, "w") as f:
      df.to_csv(f)
    shard_count += 1
    logging("number of shards processed: ", shard_count)
コード例 #2
0
ファイル: imagenet_train_eval.py プロジェクト: ml-lab/rigl
def main(argv):
    del argv  # Unused.

    tf.enable_resource_variables()
    tf.set_random_seed(FLAGS.seed)
    set_lr_schedule()
    set_custom_sparsity_map()
    folder_stub = os.path.join(FLAGS.training_method, str(FLAGS.end_sparsity),
                               str(FLAGS.maskupdate_begin_step),
                               str(FLAGS.maskupdate_end_step),
                               str(FLAGS.maskupdate_frequency),
                               str(FLAGS.drop_fraction),
                               str(FLAGS.label_smoothing),
                               str(FLAGS.weight_decay))

    output_dir = FLAGS.output_dir
    if FLAGS.use_folder_stub:
        output_dir = os.path.join(output_dir, folder_stub)

    export_dir = os.path.join(output_dir, 'export_dir')

    # we pass the updated eval and train string to the params dictionary.
    params = {}
    params['output_dir'] = output_dir
    params['training_method'] = FLAGS.training_method
    params['use_tpu'] = FLAGS.use_tpu

    dataset_func = functools.partial(
        imagenet_input.ImageNetInput,
        data_dir=FLAGS.data_directory,
        transpose_input=False,
        num_parallel_calls=FLAGS.num_parallel_calls,
        use_bfloat16=False)
    imagenet_train, imagenet_eval = [
        dataset_func(is_training=is_training) for is_training in [True, False]
    ]

    run_config = tpu_config.RunConfig(
        master=FLAGS.master,
        model_dir=output_dir,
        save_checkpoints_steps=FLAGS.steps_per_checkpoint,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        session_config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False),
        tpu_config=tpu_config.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_cores,
            tpu_job_name=FLAGS.tpu_job_name))

    classifier = tpu_estimator.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=resnet_model_fn_w_pruning,
        params=params,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size)

    cpu_classifier = tpu_estimator.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=resnet_model_fn_w_pruning,
        params=params,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        export_to_tpu=False,
        eval_batch_size=FLAGS.eval_batch_size)

    if FLAGS.num_eval_images % FLAGS.eval_batch_size != 0:
        raise ValueError(
            'eval_batch_size (%d) must evenly divide num_eval_images(%d)!' %
            (FLAGS.eval_batch_size, FLAGS.num_eval_images))

    eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size
    if FLAGS.mode == 'eval_once':
        ckpt_path = os.path.join(output_dir, FLAGS.eval_once_ckpt_prefix)
        dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval
        classifier.evaluate(input_fn=dataset.input_fn,
                            steps=eval_steps,
                            checkpoint_path=ckpt_path,
                            name='{0}'.format(FLAGS.eval_once_ckpt_prefix))
    elif FLAGS.mode == 'eval':
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(output_dir):
            tf.logging.info('Starting to evaluate.')
            try:
                dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval
                classifier.evaluate(input_fn=dataset.input_fn,
                                    steps=eval_steps,
                                    checkpoint_path=ckpt,
                                    name='eval')
                # Terminate eval job when final checkpoint is reached
                global_step = int(os.path.basename(ckpt).split('-')[1])
                if global_step >= FLAGS.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d' %
                        global_step)
                    break

            except tf.errors.NotFoundError:
                logging('Checkpoint no longer exists,skipping checkpoint.')

    else:
        global_step = estimator._load_global_step_from_checkpoint_dir(
            output_dir)
        # Session run hooks to export model for prediction
        export_hook = ExportModelHook(cpu_classifier, export_dir)
        hooks = [export_hook]

        if FLAGS.mode == 'train':
            tf.logging.info('start training...')
            classifier.train(input_fn=imagenet_train.input_fn,
                             hooks=hooks,
                             max_steps=FLAGS.train_steps)
        else:
            assert FLAGS.mode == 'train_and_eval'
            tf.logging.info('start training and eval...')
            while global_step < FLAGS.train_steps:
                next_checkpoint = min(global_step + FLAGS.steps_per_eval,
                                      FLAGS.train_steps)
                classifier.train(input_fn=imagenet_train.input_fn,
                                 max_steps=next_checkpoint)
                global_step = next_checkpoint
                logging('Completed training up to step :', global_step)
                classifier.evaluate(input_fn=imagenet_eval.input_fn,
                                    steps=eval_steps)
コード例 #3
0
def main(_):

    if FLAGS.pruning_method in ['threshold']:
        folder_stub = os.path.join(FLAGS.pruning_method,
                                   str(FLAGS.end_sparsity),
                                   str(FLAGS.sparsity_begin_step),
                                   str(FLAGS.sparsity_end_step),
                                   str(FLAGS.pruning_frequency),
                                   str(FLAGS.label_smoothing))
    elif FLAGS.pruning_method == 'variational_dropout':
        folder_stub = os.path.join(FLAGS.pruning_method,
                                   str(FLAGS.sparsity_begin_step),
                                   str(FLAGS.sparsity_end_step),
                                   str(FLAGS.reg_scalar),
                                   str(FLAGS.label_smoothing))
    elif FLAGS.pruning_method == 'l0_regularization':
        folder_stub = os.path.join(FLAGS.pruning_method,
                                   str(FLAGS.sparsity_begin_step),
                                   str(FLAGS.sparsity_end_step),
                                   str(FLAGS.reg_scalar),
                                   str(FLAGS.label_smoothing))
    elif FLAGS.pruning_method == 'baseline':
        folder_stub = os.path.join(FLAGS.pruning_method, str(0.0), str(0.0),
                                   str(0.0), str(0.0))
    elif FLAGS.pruning_method == 'scratch':
        run_info = FLAGS.load_mask_dir.split('/')
        run_type = run_info[10]
        run_sparsity = run_info[11]
        run_begin = run_info[12]
        run_end = run_info[13]
        run_freq = run_info[14]
        run_label_smoothing = run_info[15]
        folder_stub = os.path.join(FLAGS.pruning_method, run_type,
                                   run_sparsity, run_begin, run_end, run_freq,
                                   run_label_smoothing, FLAGS.init_method)
    else:
        raise ValueError('Pruning method is not known %s' %
                         (FLAGS.pruning_method))

    output_dir = os.path.join(FLAGS.output_dir, folder_stub)

    export_dir = os.path.join(output_dir, 'export_dir')

    # we pass the updated eval and train string to the params dictionary.
    params = {}
    params['output_dir'] = output_dir
    params['pruning_method'] = FLAGS.pruning_method
    params['use_tpu'] = FLAGS.use_tpu
    params['log_alpha_threshold'] = FLAGS.log_alpha_threshold

    imagenet_train, imagenet_eval = [
        imagenet_input.ImageNetInput(  # pylint: disable=g-complex-comprehension
            is_training=is_training,
            data_dir=FLAGS.data_directory,
            transpose_input=False,
            num_parallel_calls=FLAGS.num_parallel_calls,
            use_bfloat16=False) for is_training in [True, False]
    ]

    run_config = tpu_config.RunConfig(
        master=FLAGS.master,
        model_dir=output_dir,
        save_checkpoints_steps=FLAGS.steps_per_checkpoint,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        session_config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False),
        tpu_config=tpu_config.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_cores,
            tpu_job_name=FLAGS.tpu_job_name))

    classifier = tpu_estimator.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=resnet_model_fn_w_pruning,
        params=params,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size)

    cpu_classifier = tpu_estimator.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=resnet_model_fn_w_pruning,
        params=params,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        export_to_tpu=False,
        eval_batch_size=FLAGS.eval_batch_size)

    if FLAGS.num_eval_images % FLAGS.eval_batch_size != 0:
        raise ValueError(
            'eval_batch_size (%d) must evenly divide num_eval_images(%d)!' %
            (FLAGS.eval_batch_size, FLAGS.num_eval_images))

    eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size

    if FLAGS.mode == 'eval_once':
        ckpt = FLAGS.output_dir + 'model.ckpt-{}'.format(FLAGS.checkpoint_step)
        classifier.evaluate(input_fn=imagenet_eval.input_fn,
                            steps=eval_steps,
                            checkpoint_path=ckpt,
                            name='{0}'.format(
                                int(FLAGS.log_alpha_threshold * 10)))
    elif FLAGS.mode == 'eval':
        # Run evaluation when there's a new checkpoint
        for ckpt in evaluation.checkpoints_iterator(output_dir):
            print('Starting to evaluate.')
            try:
                classifier.evaluate(input_fn=imagenet_eval.input_fn,
                                    steps=eval_steps,
                                    checkpoint_path=ckpt,
                                    name='{0}'.format(
                                        int(FLAGS.log_alpha_threshold * 10)))
                # Terminate eval job when final checkpoint is reached
                global_step = int(os.path.basename(ckpt).split('-')[1])
                if global_step >= FLAGS.train_steps:
                    print('Evaluation finished after training step %d' %
                          global_step)
                    break

            except tf.errors.NotFoundError:
                logging('Checkpoint no longer exists,skipping checkpoint.')

    else:
        global_step = tf.estimator._load_global_step_from_checkpoint_dir(
            output_dir)  # pylint: disable=protected-access,line-too-long
        # Session run hooks to export model for prediction
        export_hook = ExportModelHook(cpu_classifier, export_dir)
        hooks = [export_hook]

        if FLAGS.mode == 'train':
            print('start training...')
            classifier.train(input_fn=imagenet_train.input_fn,
                             hooks=hooks,
                             max_steps=FLAGS.train_steps)
        else:
            assert FLAGS.mode == 'train_and_eval'
            print('start training and eval...')
            while global_step < FLAGS.train_steps:
                next_checkpoint = min(global_step + FLAGS.steps_per_eval,
                                      FLAGS.train_steps)
                classifier.train(input_fn=imagenet_train.input_fn,
                                 max_steps=next_checkpoint)
                global_step = next_checkpoint
                logging('Completed training up to step :', global_step)
                classifier.evaluate(input_fn=imagenet_eval.input_fn,
                                    steps=eval_steps)