Exemple #1
0
  def evaluate(self,
               x = None,
               y = None,
               callbacks = None,
               tnt_micro_batch_size = None,
               tnt_distribute_dataset = True,
               **kwargs):
    self._setup_for_execution('evaluate', x, y, kwargs)
    processed_callbacks = utilities._preprocess_callbacks(callbacks, self.group,
                                                          parallel_strategy = tnt.ParallelStrategy.DATA,
                                                          exec_type = 'evaluate',
                                                          verbose = kwargs.get('verbose', None))

    if tnt_distribute_dataset:
      test_dataset = tnt.data.Dataset(dataset = x,
                                      num_ranks = self.group.size,
                                      rank = self.group.to_group_rank(self.rank),
                                      shuffle_seed = self.default_shuffle_seed)
      x = test_dataset.distribute_dataset_across_ranks(
              user_micro_batch_size = tnt_micro_batch_size,
              is_training = False)
      self._validate_micro_batch_size_for_batch_normalization(test_dataset.micro_batch_size)
    else:
      logger.info("Automatic dataset distribution is disabled.")

    return self.model.evaluate(x, callbacks = processed_callbacks, **kwargs)
Exemple #2
0
  def fit(self,
          x = None,
          y = None,
          callbacks = None,
          validation_data = None,
          **kwargs):
    logger.info(f"[PartitionedModel] fit.")
    self._configure_rebuild(dataset = x)
    self._build_model_and_compile_if_necessary()
    processed_callbacks = utilities._preprocess_callbacks(callbacks, self.group,
                                                          parallel_strategy = tnt.ParallelStrategy.PIPELINING,
                                                          exec_type = 'fit',
                                                          verbose = kwargs.get('verbose', None))

    ds = self._get_microbatched_dataset(dataset = x, nano_batch_size = self.nano_batch_size,
                                        num_pipeline_stages = self.num_pipeline_stages)

    distributed_validation_data = None
    if validation_data:
      distributed_validation_data = self._get_microbatched_dataset(dataset = validation_data,
                                                                   nano_batch_size = self.nano_batch_size,
                                                                   num_pipeline_stages = self.num_pipeline_stages)

    return self.model.fit(x = ds, callbacks = processed_callbacks,
                          validation_data = distributed_validation_data,
                          **kwargs)
Exemple #3
0
  def evaluate(self,
               x = None,
               y = None,
               callbacks = None,
               **kwargs):
    self._configure_rebuild(dataset = x)
    self._build_model_and_compile_if_necessary()

    processed_callbacks = utilities._preprocess_callbacks(callbacks, self.group,
                                                          parallel_strategy = tnt.ParallelStrategy.PIPELINING,
                                                          exec_type = 'evaluate',
                                                          verbose = kwargs.get('verbose', None))

    ds = self._get_microbatched_dataset(dataset = x, nano_batch_size = self.nano_batch_size,
                                        num_pipeline_stages = self.num_pipeline_stages)

    return_dict = kwargs.pop('return_dict', None)
    test_loss_metrics = self.model.evaluate(x = ds,
                                            callbacks = processed_callbacks,
                                            return_dict = False,
                                            **kwargs)
    user_visible_loss_metrics = putil.extract_user_visible_metrics(
                                      dict(zip(self.model.metrics_names, test_loss_metrics)))
    if len(user_visible_loss_metrics) == 1:
      return user_visible_loss_metrics[0]
    else:
      metrics_dict = putil.avg_metrics_over_pipeline_stages(user_visible_loss_metrics)
      if return_dict == True:
        return metrics_dict
      metrics_values = []
      for metric in metrics_dict.values():
        metrics_values += metric if isinstance(metric, list) else [metric]
      return metrics_values
Exemple #4
0
  def predict(self,
              x = None,
              callbacks = None,
              **kwargs):
    self._configure_rebuild(dataset = x)
    self._build_model_and_compile_if_necessary()

    processed_callbacks = utilities._preprocess_callbacks(callbacks, self.group,
                                                          parallel_strategy = tnt.ParallelStrategy.PIPELINING,
                                                          exec_type = 'predict',
                                                          verbose = kwargs.get('verbose', None))

    ds = self._get_microbatched_dataset(dataset = x, nano_batch_size = self.nano_batch_size,
                                        num_pipeline_stages = self.num_pipeline_stages)
    test_loss_metrics = self.model.predict(x = ds, callbacks = processed_callbacks, **kwargs)
    if tnt.is_group_master_rank(self.group):  # last partition
      return test_loss_metrics
Exemple #5
0
  def fit(self,
          x = None,
          y = None,
          callbacks = None,
          validation_data = None,
          tnt_micro_batch_size = None,
          tnt_validation_micro_batch_size = None,
          tnt_distribute_dataset = True,
          tnt_distribute_validation_dataset = True,
          **kwargs):
    self._setup_for_execution('fit', x, y, kwargs)
    processed_callbacks = utilities._preprocess_callbacks(callbacks, self.group,
                                                          parallel_strategy = tnt.ParallelStrategy.DATA,
                                                          exec_type = 'fit',
                                                          verbose = kwargs.get('verbose', None))

    if tnt_distribute_dataset:
      # Distribute dataset into micro-batches among ranks by taking into account
      # all possible cases of splitting the dataset:
      #
      # 1. Batch size
      # a. `batch_size` is a multiple of the number of ranks
      #     => identical `micro_batch_size` for all ranks
      # b. `batch_size` is not a multiple of the number of ranks
      #     => different ranks have different `micro_batch_size`s and
      #        locally computed gradients need to be scaled by a factor to
      #        account for the differences
      # c. `batch_size` < number of ranks
      #     => raise Error
      #
      # 2. Last batch within epoch
      # a. the last batch in the dataset is incomplete, but dataset is batched
      #    with `drop_remainder = True`
      #     => the last batch is dropped
      # b. the last batch in the dataset is incomplete with `drop_remainder = False`
      #     - number of samples in the last batch is smaller than `num_ranks`,
      #         => pad the dataset with a number of zeroed samples to ensure that each rank
      #            has one sample, so that they all see the same number of iterations in an epoch;
      #            the fake samples will be filtered out from the final gradient computation by
      #            assigning them `micro_batch_size = 0`
      #     - number of samples in the last batch is >= `num_ranks`
      #         => last batch can be considered a new `batch_size`, which will be handled as above (in 1.),
      #            both for computing the `micro_batch_size` and the `scaling_factor`
      distributed_x = tnt.data.Dataset(dataset = x,
                                       num_ranks = self.group.size,
                                       rank = self.group.to_group_rank(self.rank),
                                       shuffle_seed = self.default_shuffle_seed)
      x = distributed_x.distribute_dataset_across_ranks(
            user_micro_batch_size = tnt_micro_batch_size,
            is_training = True)
      self._validate_micro_batch_size_for_batch_normalization(distributed_x.micro_batch_size)

      # if different ranks have different micro-batch sizes, the gradients need rescaling
      dataset_callback = distributed_x.get_gradient_scaling_callback()
      if dataset_callback:
        processed_callbacks.append(dataset_callback)

    else:
      logger.info("Automatic dataset distribution is disabled."
                  "Make sure the dataset is sharded manually across ranks.")

    # Always switch off shuffling
    kwargs["shuffle"] = False

    if validation_data:
      if tnt_distribute_validation_dataset:
        distributed_validation_data = tnt.data.Dataset(dataset = validation_data,
                                                       num_ranks = self.group.size,
                                                       rank = self.group.to_group_rank(self.rank),
                                                       shuffle_seed = self.default_shuffle_seed)
        validation_data = distributed_validation_data.distribute_dataset_across_ranks(
              user_micro_batch_size = tnt_validation_micro_batch_size,
              is_training = False)
        self._validate_micro_batch_size_for_batch_normalization(distributed_validation_data.micro_batch_size)
      else:
        logger.info("Automatic distribution for the validation dataset is disabled.")

    return self.model.fit(x = x,
                          validation_data = validation_data,
                          callbacks = processed_callbacks,
                          **kwargs)