Python get_num_gpusの例、utils.core.get_num_gpus Pythonの例

コード例 #1

0

ファイルを表示

ファイル: resnet_run_loop.py プロジェクト: snuzxj/dt_resnet_imagenet

 def input_fn_train():
     return input_function(
         is_training=True,
         data_dir=flags_obj.data_dir,
         batch_size=distribution_utils.per_device_batch_size(
             flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
         num_epochs=flags_obj.train_epochs,
         num_gpus=flags_core.get_num_gpus(flags_obj),
         dtype=flags_core.get_tf_dtype(flags_obj))

コード例 #2

0

ファイルを表示

def test(_):
    tf.enable_eager_execution()
    flag_obj = define_coco_flags()
    cocodataset = coco_dataset.CocoDataset()

    cocodataset.load_coco('/home/hume/Deep-learning/dataset/coco', 'train',
                          DEFAULT_DATASET_YEAR)
    cocodataset.prepare()

    augmentation = imgaug.augmenters.Fliplr(0.5)

    input_iter = input_fn(
        cocodataset,
        is_training=True,
        batch_size=distribution_utils.per_device_batch_size(
            flag_obj.batch_size, flags_core.get_num_gpus(flag_obj)),
        anchors_path=flag_obj.anchors_path,
        num_epochs=flag_obj.train_epochs,
        dtype=tf.float32,
        max_num_boxes_per_image=flag_obj.max_num_boxes_per_image,
        image_size=flag_obj.image_size,
        augmentation=augmentation,
        num_parallel_batches=flag_obj.datasets_num_parallel_batches,
        datasets_num_private_threads=multiprocessing.cpu_count() - 3)
    coco_iter = input_iter.make_one_shot_iterator()
    starttime = time()
    imgs, y_gt = coco_iter.get_next()
    print('cost {}ms\n'.format((time() - starttime) * 1000))

    print(imgs.shape)
    print(y_gt.shape)

コード例 #3

0

ファイルを表示

 def input_fn_train(num_epochs):
     return input_function(
         data_set=dataset,
         is_training=True,
         batch_size=distribution_utils.per_device_batch_size(
             flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
         anchors_path=flags_obj.anchors_path,
         num_epochs=num_epochs,
         augmentation=augmentation,
         dtype=tf.float32,
         max_num_boxes_per_image=flags_obj.max_num_boxes_per_image,
         image_size=flags_obj.image_size,
         datasets_num_private_threads=flags_obj.
         datasets_num_private_threads,
         num_parallel_batches=flags_obj.datasets_num_parallel_batches)

コード例 #4

0

ファイルを表示

ファイル: transformer_main.py プロジェクト: Peter-Chou/models_transformer_with_notes

def construct_estimator(flags_obj, params, schedule_manager):
  """Construct an estimator from either Estimator or TPUEstimator.

  Args:
    flags_obj: The FLAGS object parsed from command line.
    params: A dict of run specific parameters.
    schedule_manager: A schedule.Manager object containing the run schedule.

  Returns:
    An estimator object to be used for training and eval.
  """
  if not params["use_tpu"]:
    distribution_strategy = distribution_utils.get_distribution_strategy(
        flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg)
    return tf.estimator.Estimator(
        model_fn=model_fn, model_dir=flags_obj.model_dir, params=params,
        config=tf.estimator.RunConfig(train_distribute=distribution_strategy))

  tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
      tpu=flags_obj.tpu,
      zone=flags_obj.tpu_zone,
      project=flags_obj.tpu_gcp_project
  )

  tpu_config = tf.contrib.tpu.TPUConfig(
      iterations_per_loop=schedule_manager.single_iteration_train_steps,
      num_shards=flags_obj.num_tpu_shards)

  run_config = tf.contrib.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      model_dir=flags_obj.model_dir,
      session_config=tf.ConfigProto(
          allow_soft_placement=True, log_device_placement=True),
      tpu_config=tpu_config)

  return tf.contrib.tpu.TPUEstimator(
      model_fn=model_fn,
      use_tpu=params["use_tpu"] and flags_obj.tpu != tpu_util.LOCAL,
      train_batch_size=schedule_manager.batch_size,
      eval_batch_size=schedule_manager.batch_size,
      params={
          # TPUEstimator needs to populate batch_size itself due to sharding.
          key: value for key, value in params.items() if key != "batch_size"},
      config=run_config)

コード例 #5

0

ファイルを表示

    def __init__(self):

        anchors = utils.get_anchors(flags.FLAGS.anchors_path)
        num_anchors = len(anchors)
        anchors = np.array(anchors, dtype=np.float32)

        super(CocoModel, self).__init__(
            image_size=flags.FLAGS.image_size,
            image_channels=flags.FLAGS.image_channels,
            num_classes=flags.FLAGS.num_classes,
            anchors=anchors,
            batch_size=distribution_utils.per_device_batch_size(
                flags.FLAGS.batch_size, flags_core.get_num_gpus(flags.FLAGS)),
            num_anchors=num_anchors,
            learning_rate=flags.FLAGS.learning_rate,
            backbone=flags.FLAGS.backbone,
            norm=flags.FLAGS.norm,
            threshold=flags.FLAGS.threshold,
            max_num_boxes_per_image=flags.FLAGS.max_num_boxes_per_image,
            confidence_score=flags.FLAGS.confidence_score,
            data_format=flags.FLAGS.data_format,
            dtype=flags_core.get_tf_dtype(flags.FLAGS))

コード例 #6

0

ファイルを表示

ファイル: transformer_main.py プロジェクト: Peter-Chou/models_transformer_with_notes

def run_transformer(flags_obj):
  """Create tf.Estimator to train and evaluate transformer model.

  Args:
    flags_obj: Object containing parsed flag values.
  """
  num_gpus = flags_core.get_num_gpus(flags_obj)

  # Add flag-defined parameters to params object
  params = PARAMS_MAP[flags_obj.param_set]
  if num_gpus > 1:
    if flags_obj.param_set == "big":
      params = model_params.BIG_MULTI_GPU_PARAMS
    elif flags_obj.param_set == "base":
      params = model_params.BASE_MULTI_GPU_PARAMS

  params["data_dir"] = flags_obj.data_dir
  params["model_dir"] = flags_obj.model_dir
  params["num_parallel_calls"] = flags_obj.num_parallel_calls

  params["tpu"] = flags_obj.tpu
  params["use_tpu"] = bool(flags_obj.tpu)  # was a tpu specified.
  params["static_batch"] = flags_obj.static_batch or params["use_tpu"]
  params["allow_ffn_pad"] = not params["use_tpu"]

  params["use_synthetic_data"] = flags_obj.use_synthetic_data

  # Set batch size parameter, which depends on the availability of
  # TPU and GPU, and distribution settings.
  params["batch_size"] = (flags_obj.batch_size or (
      params["default_batch_size_tpu"] if params["use_tpu"]
      else params["default_batch_size"]))

  if not params["use_tpu"]:
    params["batch_size"] = distribution_utils.per_device_batch_size(
        params["batch_size"], num_gpus)

  schedule_manager = schedule.Manager(
      train_steps=flags_obj.train_steps,
      steps_between_evals=flags_obj.steps_between_evals,
      train_epochs=flags_obj.train_epochs,
      epochs_between_evals=flags_obj.epochs_between_evals,
      default_train_epochs=DEFAULT_TRAIN_EPOCHS,
      batch_size=params["batch_size"],
      max_length=params["max_length"],
      use_tpu=params["use_tpu"],
      num_tpu_shards=flags_obj.num_tpu_shards
  )

  params["repeat_dataset"] = schedule_manager.repeat_dataset

  model_helpers.apply_clean(flags.FLAGS)

  # Create hooks that log information about the training and metric values
  train_hooks = hooks_helper.get_train_hooks(
      flags_obj.hooks,
      model_dir=flags_obj.model_dir,
      tensors_to_log=TENSORS_TO_LOG,  # used for logging hooks
      batch_size=schedule_manager.batch_size,  # for ExamplesPerSecondHook
      use_tpu=params["use_tpu"]  # Not all hooks can run with TPUs
  )
  benchmark_logger = logger.get_benchmark_logger()
  benchmark_logger.log_run_info(
      model_name="transformer",
      dataset_name="wmt_translate_ende",
      run_params=params,
      test_id=flags_obj.benchmark_test_id)

  # Train and evaluate transformer model
  estimator = construct_estimator(flags_obj, params, schedule_manager)
  run_loop(
      estimator=estimator,
      # Training arguments
      schedule_manager=schedule_manager,
      train_hooks=train_hooks,
      benchmark_logger=benchmark_logger,
      # BLEU calculation arguments
      bleu_source=flags_obj.bleu_source,
      bleu_ref=flags_obj.bleu_ref,
      bleu_threshold=flags_obj.stop_threshold,
      vocab_file=flags_obj.vocab_file)

  if flags_obj.export_dir and not params["use_tpu"]:
    serving_input_fn = export.build_tensor_serving_input_receiver_fn(
        shape=[None], dtype=tf.int64, batch_size=None)
    # Export saved model, and save the vocab file as an extra asset. The vocab
    # file is saved to allow consistent input encoding and output decoding.
    # (See the "Export trained model" section in the README for an example of
    # how to use the vocab file.)
    # Since the model itself does not use the vocab file, this file is saved as
    # an extra asset rather than a core asset.
    estimator.export_savedmodel(
        flags_obj.export_dir, serving_input_fn,
        assets_extra={"vocab.txt": flags_obj.vocab_file})

コード例 #7

0

ファイルを表示

ファイル: resnet_run_loop.py プロジェクト: snuzxj/dt_resnet_imagenet

def resnet_main(flags_obj,
                model_function,
                input_function,
                dataset_name,
                shape=None):
    """Shared main loop for ResNet Models.

  Args:
    flags_obj: An object containing parsed flags. See define_resnet_flags()
      for details.
    model_function: the function that instantiates the Model and builds the
      ops for train/eval. This will be passed directly into the estimator.
    input_function: the function that processes the dataset and returns a
      dataset that the estimator can train on. This will be wrapped with
      all the relevant flags for running and passed to estimator.
    dataset_name: the name of the dataset for training and evaluation. This is
      used for logging purpose.
    shape: list of ints representing the shape of the images used for training.
      This is only used if flags_obj.export_dir is passed.
  """

    model_helpers.apply_clean(flags.FLAGS)

    # Using the Winograd non-fused algorithms provides a small performance boost.
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    # Create session config based on values of inter_op_parallelism_threads and
    # intra_op_parallelism_threads. Note that we default to having
    # allow_soft_placement = True, which is required for multi-GPU and not
    # harmful for other modes.
    session_config = tf.ConfigProto(
        inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads,
        allow_soft_placement=True)

    distribution_strategy = None
    if flags_obj.distribution_strategy == 'ps':
        print('==> Using ParameterServerStrategy')
        distribution_strategy = tf.contrib.distribute.ParameterServerStrategy(
            num_gpus_per_worker=flags_core.get_num_gpus(flags_obj))
    elif flags_obj.distribution_strategy == 'allreduce':
        print('==> Using CollectiveAllReduceStrategy')
        distribution_strategy = tf.contrib.distribute.CollectiveAllReduceStrategy(
            num_gpus_per_worker=flags_core.get_num_gpus(flags_obj))
    elif flags_obj.distribution_strategy == 'mirror':
        print('==> Using MirroredStrategy')
        distribution_strategy = tf.contrib.distribute.MirroredStrategy(
            num_gpus_per_worker=flags_core.get_num_gpus(flags_obj))
    else:
        print("==> Distribution Strategy {} is not valid".format(
            flags_obj.distribution_strategy))

    run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy,
                                        session_config=session_config,
                                        protocol="grpc+verbs",
                                        log_step_count_steps=1000)

    tf.logging.info("num_worker={}, batch_size={}, train_epochs={}".format(
        run_config.num_worker_replicas, flags_obj.batch_size,
        flags_obj.train_epochs))
    # initialize our model with all but the dense layer from pretrained resnet
    if flags_obj.pretrained_model_checkpoint_path is not None:
        warm_start_settings = tf.estimator.WarmStartSettings(
            flags_obj.pretrained_model_checkpoint_path,
            vars_to_warm_start='^(?!.*dense)')
    else:
        warm_start_settings = None

    classifier = tf.estimator.Estimator(
        model_fn=model_function,
        model_dir=flags_obj.model_dir,
        config=run_config,
        warm_start_from=warm_start_settings,
        params={
            'resnet_size': int(flags_obj.resnet_size),
            'data_format': flags_obj.data_format,
            'batch_size': flags_obj.batch_size,
            'resnet_version': int(flags_obj.resnet_version),
            'loss_scale': flags_core.get_loss_scale(flags_obj),
            'dtype': flags_core.get_tf_dtype(flags_obj),
            'fine_tune': flags_obj.fine_tune,
            'num_workers': run_config.num_worker_replicas
        })

    run_params = {
        'batch_size': flags_obj.batch_size * run_config.num_worker_replicas,
        'dtype': flags_core.get_tf_dtype(flags_obj),
        'resnet_size': flags_obj.resnet_size,
        'resnet_version': flags_obj.resnet_version,
        'synthetic_data': flags_obj.use_synthetic_data,
        'train_epochs': flags_obj.train_epochs,
    }
    if flags_obj.use_synthetic_data:
        dataset_name = dataset_name + '-synthetic'

    benchmark_logger = logger.get_benchmark_logger()
    benchmark_logger.log_run_info('resnet',
                                  dataset_name,
                                  run_params,
                                  test_id=flags_obj.benchmark_test_id)

    train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks,
                                               model_dir=flags_obj.model_dir,
                                               batch_size=flags_obj.batch_size)

    def input_fn_train():
        return input_function(
            is_training=True,
            data_dir=flags_obj.data_dir,
            batch_size=distribution_utils.per_device_batch_size(
                flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
            num_epochs=flags_obj.train_epochs,
            num_gpus=flags_core.get_num_gpus(flags_obj),
            dtype=flags_core.get_tf_dtype(flags_obj))

    def input_fn_eval():
        return input_function(
            is_training=False,
            data_dir=flags_obj.data_dir,
            batch_size=distribution_utils.per_device_batch_size(
                flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
            num_epochs=1,
            dtype=flags_core.get_tf_dtype(flags_obj))

    train_spec = tf.estimator.TrainSpec(input_fn=input_fn_train,
                                        hooks=train_hooks)
    eval_spec = tf.estimator.EvalSpec(input_fn=input_fn_eval,
                                      throttle_secs=1800,
                                      steps=None,
                                      start_delay_secs=10)
    if flags_obj.eval == 0:
        tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)
    else:
        while True:
            eval_results = classifier.evaluate(input_fn=input_fn_eval,
                                               steps=50000 //
                                               flags_obj.batch_size)
            time.sleep(flags_obj.eval)
    '''
  if flags_obj.eval_only or not flags_obj.train_epochs:
    # If --eval_only is set, perform a single loop with zero train epochs.
    schedule, n_loops = [0], 1
  else:
    # Compute the number of times to loop while training. All but the last
    # pass will train for `epochs_between_evals` epochs, while the last will
    # train for the number needed to reach `training_epochs`. For instance if
    #   train_epochs = 25 and epochs_between_evals = 10
    # schedule will be set to [10, 10, 5]. That is to say, the loop will:
    #   Train for 10 epochs and then evaluate.
    #   Train for another 10 epochs and then evaluate.
    #   Train for a final 5 epochs (to reach 25 epochs) and then evaluate.
    n_loops = math.ceil(flags_obj.train_epochs / flags_obj.epochs_between_evals)
    schedule = [flags_obj.epochs_between_evals for _ in range(int(n_loops))]
    schedule[-1] = flags_obj.train_epochs - sum(schedule[:-1])  # over counting.

  for cycle_index, num_train_epochs in enumerate(schedule):
    tf.logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops))

    if num_train_epochs:
      classifier.train(input_fn=lambda: input_fn_train(num_train_epochs),
                       hooks=train_hooks, max_steps=flags_obj.max_train_steps)

    tf.logging.info('Starting to evaluate.')

    # flags_obj.max_train_steps is generally associated with testing and
    # profiling. As a result it is frequently called with synthetic data, which
    # will iterate forever. Passing steps=flags_obj.max_train_steps allows the
    # eval (which is generally unimportant in those circumstances) to terminate.
    # Note that eval will run for max_train_steps each loop, regardless of the
    # global_step count.
    eval_results = classifier.evaluate(input_fn=input_fn_eval,
                                       steps=flags_obj.max_train_steps)

    benchmark_logger.log_evaluation_result(eval_results)

    if model_helpers.past_stop_threshold(
        flags_obj.stop_threshold, eval_results['accuracy']):
      break

  '''
    if flags_obj.export_dir is not None:
        # Exports a saved model for the given classifier.
        if flags_obj.image_bytes_as_serving_input:
            input_receiver_fn = functools.partial(image_bytes_serving_input_fn,
                                                  shape)
        else:
            input_receiver_fn = export.build_tensor_serving_input_receiver_fn(
                shape, batch_size=flags_obj.batch_size)
        classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn)

コード例 #8

0

ファイルを表示

def yolo_main(flags_obj, model_function, input_function, dataset,
              augmentation):
    """Shared main loop for yolo Models.

    Args:
        flags_obj: An object containing parsed flags. See define_yolo_flags()
            for details.
        model_function: the function that instantiates the Model and builds the
            ops for train/eval. This will be passed directly into the estimator.
        input_function: the function that processes the dataset and returns a
            dataset that the estimator can train on. This will be wrapped with
            all the relevant flags for running and passed to estimator.
        dataset: A dataset for training and evaluation.
        augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
            For example, passing imgaug.augmenters.Fliplr(0.5) flips images
            right/left 50% of the time.
      """

    model_helpers.apply_clean(flags_obj)

    # Ensures flag override logic is only executed if explicitly triggered.
    if flags_obj.tf_gpu_thread_mode:
        override_flags_and_set_envars_for_gpu_thread_pool(flags_obj)

    # Creates session config. allow_soft_placement = True, is required for
    # multi-GPU and is not harmful for other modes.
    session_config = tf.ConfigProto(
        log_device_placement=True,
        inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads,
        allow_soft_placement=True)

    session_config.gpu_options.allow_growth = True

    distribution_strategy = distribution_utils.get_distribution_strategy(
        flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg)

    run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy,
                                        session_config=session_config,
                                        save_checkpoints_secs=60 * 60 * 24)

    # Initializes model with all but the dense layer from pretrained ResNet.
    if flags_obj.pretrained_model_checkpoint_path is not None:
        warm_start_settings = tf.estimator.WarmStartSettings(
            flags_obj.pretrained_model_checkpoint_path,
            vars_to_warm_start='^(?!.*dense)')
    else:
        warm_start_settings = None

    anchors = np.array(utils.get_anchors(flags_obj.anchors_path))

    detector = tf.estimator.Estimator(
        model_fn=model_function,
        model_dir=flags_obj.model_dir,
        config=run_config,
        warm_start_from=warm_start_settings,
        params={
            'num_classes':
            flags_obj.num_classes,
            'data_format':
            flags_obj.data_format,
            'batch_size':
            distribution_utils.per_device_batch_size(
                flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
            'image_size':
            int(flags_obj.image_size),
            'loss_scale':
            flags_core.get_loss_scale(flags_obj),
            'dtype':
            flags_core.get_tf_dtype(flags_obj),
            'fine_tune':
            flags_obj.fine_tune,
            'anchors':
            anchors,
            'num_anchors':
            len(anchors),
            'max_num_boxes_per_image':
            flags_obj.max_num_boxes_per_image,
            'threshold':
            flags_obj.threshold,
            'train':
            dataset.num_images,
            'learning_rate':
            flags_obj.learning_rate
        })

    # if flags_obj.use_synthetic_data:
    #     dataset_name = dataset_name + '-synthetic'

    def input_fn_train(num_epochs):
        return input_function(
            data_set=dataset,
            is_training=True,
            batch_size=distribution_utils.per_device_batch_size(
                flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
            anchors_path=flags_obj.anchors_path,
            num_epochs=num_epochs,
            augmentation=augmentation,
            dtype=tf.float32,
            max_num_boxes_per_image=flags_obj.max_num_boxes_per_image,
            image_size=flags_obj.image_size,
            datasets_num_private_threads=flags_obj.
            datasets_num_private_threads,
            num_parallel_batches=flags_obj.datasets_num_parallel_batches)

    '''
    def input_fn_eval():
        return input_function(
            is_training=False,
            data_dir=flags_obj.data_dir,
            batch_size=distribution_utils.per_device_batch_size(
                flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
            num_epochs=1,
            dtype=flags_core.get_tf_dtype(flags_obj))
            '''

    if flags_obj.eval_only or not flags_obj.train_epochs:
        # If --eval_only is set, perform a single loop with zero train epochs.
        schedule, n_loops = [0], 1
    else:
        # Compute the number of times to loop while training. All but the last
        # pass will train for `epochs_between_evals` epochs, while the last will
        # train for the number needed to reach `training_epochs`. For instance if
        #   train_epochs = 25 and epochs_between_evals = 10
        # schedule will be set to [10, 10, 5]. That is to say, the loop will:
        #   Train for 10 epochs and then evaluate.
        #   Train for another 10 epochs and then evaluate.
        #   Train for a final 5 epochs (to reach 25 epochs) and then evaluate.
        n_loops = math.ceil(flags_obj.train_epochs /
                            flags_obj.epochs_between_evals)
        schedule = [
            flags_obj.epochs_between_evals for _ in range(int(n_loops))
        ]
        schedule[-1] = flags_obj.train_epochs - sum(
            schedule[:-1])  # over counting.

    for cycle_index, num_train_epochs in enumerate(schedule):
        tf.logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops))

        if num_train_epochs:
            detector.train(input_fn=lambda: input_fn_train(num_train_epochs),
                           max_steps=flags_obj.max_train_steps)
    '''