Esempio n. 1
0
def run_imagenet(flags_obj):
    """Run ResNet ImageNet training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
    input_fns = []
    if flags_obj.use_synthetic_data:
        input_fns.append(get_synth_input_fn())


# means using boxes
    if flags_obj.model_method == 1:
        input_fns.append(input_fn)
    elif flags_obj.model_method > 1:
        input_fns.append(input_fn)
        input_fns.append(box_cond_input_fn)
        input_fns.append(box_marg_input_fn)

    else:
        raise ValueError('invalid input for model method')

    resnet_run_loop.resnet_main(
        flags_obj,
        imagenet_model_fn,
        input_fns,
        DATASET_NAME,
        shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 2
0
def main(argv):
  parser = resnet_run_loop.ResnetArgParser(
      resnet_size_choices=[18, 34, 50, 101, 152, 200])

  parser.set_defaults(
      train_epochs=90,
      version=1
  )

  flags = parser.parse_args(args=argv[2:])

  seed = int(argv[1])
  print('Setting random seed = ', seed)
  print('special seeding')
  mlperf_log.resnet_print(key=mlperf_log.RUN_SET_RANDOM_SEED, value=seed)
  random.seed(seed)
  tf.set_random_seed(seed)
  numpy.random.seed(seed)

  mlperf_log.resnet_print(key=mlperf_log.PREPROC_NUM_TRAIN_EXAMPLES,
                          value=_NUM_IMAGES['train'])
  mlperf_log.resnet_print(key=mlperf_log.PREPROC_NUM_EVAL_EXAMPLES,
                          value=_NUM_IMAGES['validation'])
  input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn

  resnet_run_loop.resnet_main(seed,
      flags, imagenet_model_fn, input_function,
      shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 3
0
def run_cifar(flags_obj):
  """Run ResNet CIFAR-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
  if flags_obj.image_bytes_as_serving_input:
    tf.logging.fatal('--image_bytes_as_serving_input cannot be set to True '
                     'for CIFAR. This flag is only applicable to ImageNet.')
    return
## start a server for a specific task
  server = tf.train.Server(cluster,job_name=flags_obj.job_name,task_index=flags_obj.task_index)

#  input_function = (flags_obj.use_synthetic_data and get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) orinput_fn)
  if flags_obj.job_name == "ps":
    server.join()
  elif flags_obj.job_name == "worker":
    ## Between-graph replication
    with tf.device(tf.train.replica_device_setter(worker_device="/job:worker/task:%d" % flags_obj.task_index, cluster=cluster)):
    ## count the number of updates
      global_step = tf.get_variable('global_step',[],initializer = tf.constant_initializer(0),trainable = False)

      input_function = (flags_obj.use_synthetic_data and get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or input_fn)

      # run training
      resnet_run_loop.resnet_main(flags_obj, cifar10_model_fn, input_function, DATASET_NAME,shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
Esempio n. 4
0
def run_cifar(flags_obj):
    """Run ResNet CIFAR-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
    if flags_obj.image_bytes_as_serving_input:
        tf.logging.fatal(
            '--image_bytes_as_serving_input cannot be set to True '
            'for CIFAR. This flag is only applicable to ImageNet.')
        return

    input_function = (flags_obj.use_synthetic_data and get_synth_input_fn(
        flags_core.get_tf_dtype(flags_obj)) or input_fn)

    #################### My Changes #########################
    """
  purpose -- val by steps not by epochs
  change -- add the argument train_size=_NUM_IMAGES(['train'])
  """
    resnet_run_loop.resnet_main(flags_obj,
                                cifar10_model_fn,
                                input_function,
                                DATASET_NAME,
                                _NUM_IMAGES['train'],
                                shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
Esempio n. 5
0
def main(flags_obj):
  input_function = (flags_obj.use_synthetic_data and get_synth_input_fn()
                    or input_fn)

  resnet_run_loop.resnet_main(
      flags_obj, imagenet_model_fn, input_function,
      shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 6
0
def main(argv):
    parser = resnet_run_loop.ResnetArgParser(
        resnet_size_choices=[18, 34, 50, 101, 152, 200])
    flags = parser.parse_args(args=argv[1:])

    input_function = flags.use_synthetic_data and get_synth_input_fn(
    ) or input_fn
    resnet_run_loop.resnet_main(flags, imagenet_model_fn, input_function)
Esempio n. 7
0
def main(flags_obj):
    input_function = (flags_obj.use_synthetic_data and get_synth_input_fn()
                      or input_fn)

    resnet_run_loop.resnet_main(flags_obj,
                                cifar10_model_fn,
                                input_function,
                                shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
Esempio n. 8
0
def run_cifar(flags_obj):
    """Run ResNet CIFAR-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
    input_function = (flags_obj.use_synthetic_data and get_synth_input_fn()
                      or input_fn)
    resnet_run_loop.resnet_main(flags_obj,
                                cifar10_model_fn,
                                input_function,
                                DATASET_NAME,
                                shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])

    # After training, compute training loss&validation accuracy, and return them to the Monitor
    # First get event file
    train_args = []
    eval_args = []
    train_file = ''
    eval_file = ''
    train_dir = flags_obj.md

    for r, d, f in os.walk(train_dir):
        for file in f:
            if 'tfevents' in file:
                if 'eval' in r:
                    eval_file = r + '/' + file
                else:
                    train_file = r + '/' + file

    if flags_obj.status == 'init':
        # collect and plot, then connect server
        # show model_directory files and find events file
        train_args = analyzer.training_args(train_file, int(flags_obj.bs))
        #TODO uptrend define to fill abnormal
        abnormal = 0
        '''
    for r, d, f in os.walk(eval_dir):
      for file in f:
        if 'tfevents' in file:
          # This is the training events file
          eval_args = analyzer.validation_args(file)
          break
    '''

        if len(train_args) > 0:  # and len(eval_args) > 0:
            # connect server and return the data
            sendback(flags_obj, train_args, eval_args, abnormal)

    elif flags_obj.status == 'train':
        # just save the data/evaluate
        # TODO evaluate model performance
        # train_args = analyzer.training_args(train_file, int(flags_obj.bs))
        train_args = []
        abnormal = 0
        sendback(flags_obj, train_args, eval_args, abnormal)
        pass
Esempio n. 9
0
def run_cifar(flags_obj):
  """Run ResNet CIFAR-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
  input_function = (flags_obj.use_synthetic_data and get_synth_input_fn()
                    or input_fn)
  resnet_run_loop.resnet_main(
      flags_obj, cifar10_model_fn, input_function, DATASET_NAME,
      shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
Esempio n. 10
0
def run_imagenet(flags_obj):
  """Run ResNet ImageNet training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
  input_function = (flags_obj.use_synthetic_data and get_synth_input_fn()
                    or input_fn)

  resnet_run_loop.resnet_main(
      flags_obj, imagenet_model_fn, input_function, DATASET_NAME,
      shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 11
0
def run_imagenet(flags_obj):
  """Run ResNet ImageNet training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
  input_function = (flags_obj.use_synthetic_data and get_synth_input_fn()
                    or input_fn)

  resnet_run_loop.resnet_main(
      flags_obj, imagenet_model_fn, input_function, DATASET_NAME,
      shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 12
0
def main(argv):
  parser = resnet_run_loop.ResnetArgParser(
      resnet_size_choices=[18, 34, 50, 101, 152, 200])

  parser.set_defaults(
      train_epochs=100
  )

  flags = parser.parse_args(args=argv[1:])

  input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn
  resnet_run_loop.resnet_main(flags, imagenet_model_fn, input_function)
Esempio n. 13
0
def run_zj(flags_obj):
    """Run ResNet ZJ-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
    input_function = input_fn

    resnet_run_loop.resnet_main(flags_obj,
                                zj_model_fn,
                                input_function,
                                DATASET_NAME,
                                shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
Esempio n. 14
0
def run_cifar(flags_obj):
  """Run ResNet CIFAR-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
  input_function = (flags_obj.use_synthetic_data and get_synth_input_fn()
                    or input_fn)
##### INPUT
##### Specifically the "shape" that is passed in with the values being at the top of the file
  resnet_run_loop.resnet_main(
      flags_obj, cifar10_model_fn, input_function, DATASET_NAME,
      shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
Esempio n. 15
0
def run_imagenet(flags_obj):
  """Run ResNet ImageNet training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
  input_function = (flags_obj.use_synthetic_data and get_synth_input_fn()
                    or input_fn)

##### INPUT
##### specifically the shape that is defined here with the constants being up top
  resnet_run_loop.resnet_main(
      flags_obj, imagenet_model_fn, input_function, DATASET_NAME,
      shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 16
0
def main(argv):
  parser = resnet_run_loop.ResnetArgParser()
  # Set defaults that are reasonable for this model.
  parser.set_defaults(data_dir='/tmp/cifar10_data',
                      model_dir='/tmp/cifar10_model',
                      resnet_size=32,
                      train_epochs=250,
                      epochs_between_evals=10,
                      batch_size=128)

  flags = parser.parse_args(args=argv[1:])

  input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn
  resnet_run_loop.resnet_main(flags, cifar10_model_fn, input_function)
Esempio n. 17
0
def main(argv):
    parser = resnet_run_loop.ResnetArgParser(
        resnet_size_choices=[18, 34, 50, 101, 152, 200])

    parser.set_defaults(train_epochs=100)

    flags = parser.parse_args(args=argv[1:])

    input_function = flags.use_synthetic_data and get_synth_input_fn(
    ) or input_fn

    resnet_run_loop.resnet_main(
        flags,
        imagenet_model_fn,
        input_function,
        shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 18
0
def run_imagenet(flags_obj):
  """Run ResNet ImageNet training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.

  Returns:
    Dict of results of the run.  Contains the keys `eval_results` and
      `train_hooks`. `eval_results` contains accuracy (top_1) and
      accuracy_top_5. `train_hooks` is a list the instances of hooks used during
      training.
  """
  input_function = (flags_obj.use_synthetic_data and
                    get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
                    input_fn)

  if flags_obj.use_dali:
    input_function = dali_pipeline.dali_input_fn


  result = resnet_run_loop.resnet_main(
      flags_obj, imagenet_model_fn, input_function, DATASET_NAME,
      shape=[DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS])

  return result
Esempio n. 19
0
def main(argv):
  parser = resnet_run_loop.ResnetArgParser()
  # Set defaults that are reasonable for this model.
  parser.set_defaults(data_dir='/tmp/cifar10_data',
                      model_dir='/tmp/cifar10_model',
                      resnet_size=32,
                      train_epochs=250,
                      epochs_between_evals=10,
                      batch_size=128)

  flags = parser.parse_args(args=argv[1:])

  input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn

  resnet_run_loop.resnet_main(
      flags, cifar10_model_fn, input_function,
      shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
Esempio n. 20
0
def run_cifar(flags_obj):
  """Run ResNet CIFAR-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.
  """
  if flags_obj.image_bytes_as_serving_input:
    tf.logging.fatal('--image_bytes_as_serving_input cannot be set to True '
                     'for CIFAR. This flag is only applicable to ImageNet.')
    return

  input_function = (flags_obj.use_synthetic_data and
                    get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
                    input_fn)
  resnet_run_loop.resnet_main(
      flags_obj, cifar10_model_fn, input_function, DATASET_NAME,
      shape=[_HEIGHT, _WIDTH, _NUM_CHANNELS])
Esempio n. 21
0
def main(argv):
  parser = resnet_run_loop.ResnetArgParser(
      resnet_size_choices=[18, 34, 50, 101, 152, 200])

  parser.set_defaults(
      train_epochs=100
  )

  flags = parser.parse_args(args=argv[2:])

  seed = int(argv[1])
  print('Setting random seed = ', seed)
  print('special seeding')
  random.seed(seed)
  tf.set_random_seed(seed)
  numpy.random.seed(seed)

  input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn

  resnet_run_loop.resnet_main(seed,
      flags, imagenet_model_fn, input_function,
      shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 22
0
def main(argv):
    parser = resnet_run_loop.ResnetArgParser(
        resnet_size_choices=[18, 34, 50, 101, 152, 200])

    parser.set_defaults(train_epochs=100)

    flags = parser.parse_args(args=argv[1:])
    #procid = os.environ['SLURM_PROCID']
    #procid = os.environ['ALPS_APP_PE']
    #flags.model_dir = flags.model_dir + '/' + procid
    #flags.benchmark_log_dir = flags.benchmark_log_dir + '/' + procid
    #flags.export_dir = flags.export_dir + '/' + procid
    input_function = flags.use_synthetic_data and get_synth_input_fn(
    ) or input_fn

    resnet_run_loop.resnet_main(
        flags,
        imagenet_model_fn,
        input_function,
        _NUM_IMAGES['train'],
        _NUM_IMAGES['validation'],
        shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 23
0
def main(argv):
    parser = resnet_run_loop.ResnetArgParser(
        resnet_size_choices=[18, 34, 50, 101, 152, 200])

    parser.set_defaults(train_epochs=100)

    flags = parser.parse_args(args=argv[2:])

    seed = int(argv[1])
    print('Setting random seed = ', seed)
    print('special seeding')
    random.seed(seed)
    tf.set_random_seed(seed)
    numpy.random.seed(seed)

    input_function = flags.use_synthetic_data and get_synth_input_fn(
    ) or input_fn

    resnet_run_loop.resnet_main(
        seed,
        flags,
        imagenet_model_fn,
        input_function,
        shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS])
Esempio n. 24
0
def run_resnet(flags_obj):
    """Run ResNet CIFAR-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.

  Returns:
    Dictionary of results. Including final accuracy.
  """
    if flags_obj.image_bytes_as_serving_input:
        tf.compat.v1.logging.fatal(
            '--image_bytes_as_serving_input cannot be set to True for CIFAR. '
            'This flag is only applicable to ImageNet.')
        return

    result = resnet_run_loop.resnet_main(flags_obj,
                                         resnet_model_fn,
                                         input_fn,
                                         DATASET_NAME,
                                         shape=[HEIGHT, WIDTH, NUM_CHANNELS])

    return result
Esempio n. 25
0
def run_cifar(flags_obj):
  """Run ResNet CIFAR-10 training and eval loop.

  Args:
    flags_obj: An object containing parsed flag values.

  Returns:
    Dictionary of results. Including final accuracy.
  """
  if flags_obj.image_bytes_as_serving_input:
    tf.logging.fatal('--image_bytes_as_serving_input cannot be set to True '
                     'for CIFAR. This flag is only applicable to ImageNet.')
    return

  input_function = (flags_obj.use_synthetic_data and
                    get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
                    input_fn)
  result = resnet_run_loop.resnet_main(
      flags_obj, cifar10_model_fn, input_function, DATASET_NAME,
      shape=[HEIGHT, WIDTH, NUM_CHANNELS])

  return result
Esempio n. 26
0
def main(unused_argv):
    input_function = FLAGS.use_synthetic_data and get_synth_input_fn(
    ) or input_fn
    resnet_run_loop.resnet_main(FLAGS, imagenet_model_fn, input_function)