コード例 #1
0
def train(model_dir,
          overwrite=False,
          model=gin.REQUIRED,
          training_steps=gin.REQUIRED,
          random_seed=gin.REQUIRED,
          batch_size=gin.REQUIRED,
          eval_steps=1000,
          name="",
          model_num=None):
  """Trains the estimator and exports the snapshot and the gin config.

  The use of this function requires the gin binding 'dataset.name' to be
  specified as that determines the data set used for training.

  Args:
    model_dir: String with path to directory where model output should be saved.
    overwrite: Boolean indicating whether to overwrite output directory.
    model: GaussianEncoderModel that should be trained and exported.
    training_steps: Integer with number of training steps.
    random_seed: Integer with random seed used for training.
    batch_size: Integer with the batch size.
    eval_steps: Optional integer with number of steps used for evaluation.
    name: Optional string with name of the model (can be used to name models).
    model_num: Optional integer with model number (can be used to identify
      models).
  """
  # We do not use the variables 'name' and 'model_num'. Instead, they can be
  # used to name results as they will be part of the saved gin config.
  del name, model_num

  # Delete the output directory if it already exists.
  if tf.gfile.IsDirectory(model_dir):
    if overwrite:
      tf.gfile.DeleteRecursively(model_dir)
    else:
      raise ValueError("Directory already exists and overwrite is False.")

  # Create a numpy random state. We will sample the random seeds for training
  # and evaluation from this.
  random_state = np.random.RandomState(random_seed)

  # Obtain the dataset.
  dataset = named_data.get_named_ground_truth_data()

  # We create a TPUEstimator based on the provided model. This is primarily so
  # that we could switch to TPU training in the future. For now, we train
  # locally on GPUs.
  run_config = tf.contrib.tpu.RunConfig(
      tf_random_seed=random_seed,
      keep_checkpoint_max=1,
      tpu_config=tf.contrib.tpu.TPUConfig(iterations_per_loop=500))
  tpu_estimator = tf.contrib.tpu.TPUEstimator(
      use_tpu=False,
      model_fn=model.model_fn,
      model_dir=os.path.join(model_dir, "tf_checkpoint"),
      train_batch_size=batch_size,
      eval_batch_size=batch_size,
      config=run_config)

  # Set up time to keep track of elapsed time in results.
  experiment_timer = time.time()

  # Do the actual training.
  tpu_estimator.train(
      input_fn=_make_input_fn(dataset, random_state.randint(2**16)),
      steps=training_steps)

  # Save model as a TFHub module.
  output_shape = named_data.get_named_ground_truth_data().observation_shape
  module_export_path = os.path.join(model_dir, "tfhub")
  gaussian_encoder_model.export_as_tf_hub(model, output_shape,
                                          tpu_estimator.latest_checkpoint(),
                                          module_export_path)

  # Save the results. The result dir will contain all the results and config
  # files that we copied along, as we progress in the pipeline. The idea is that
  # these files will be available for analysis at the end.
  results_dict = tpu_estimator.evaluate(
      input_fn=_make_input_fn(
          dataset, random_state.randint(2**16), num_batches=eval_steps))
  results_dir = os.path.join(model_dir, "results")
  results_dict["elapsed_time"] = time.time() - experiment_timer
  results.update_result_directory(results_dir, "train", results_dict)
コード例 #2
0
def train(model_dir,
          overwrite=False,
          model=gin.REQUIRED,
          training_steps=gin.REQUIRED,
          unsupervised_data_seed=gin.REQUIRED,
          supervised_data_seed=gin.REQUIRED,
          model_seed=gin.REQUIRED,
          batch_size=gin.REQUIRED,
          num_labelled_samples=gin.REQUIRED,
          train_percentage=gin.REQUIRED,
          name=""):
  """Trains the estimator and exports the snapshot and the gin config.

  The use of this function requires the gin binding 'dataset.name' to be
  specified as that determines the data set used for training.

  Args:
    model_dir: String with path to directory where model output should be saved.
    overwrite: Boolean indicating whether to overwrite output directory.
    model: GaussianEncoderModel that should be trained and exported.
    training_steps: Integer with number of training steps.
    unsupervised_data_seed: Integer with random seed used for the unsupervised
      data.
    supervised_data_seed: Integer with random seed for supervised data.
    model_seed: Integer with random seed used for the model.
      batch_size: Integer with the batch size.
    num_labelled_samples: Integer with number of labelled observations for
      training.
    train_percentage: Fraction of the labelled data to use for training (0,1)
    name: Optional string with name of the model (can be used to name models).
  """
  # We do not use the variable 'name'. Instead, it can be used to name results
  # as it will be part of the saved gin config.
  del name

  # Delete the output directory if necessary.
  if tf.gfile.IsDirectory(model_dir):
    if overwrite:
      tf.gfile.DeleteRecursively(model_dir)
    else:
      raise ValueError("Directory already exists and overwrite is False.")

  # Obtain the dataset.
  dataset = named_data.get_named_ground_truth_data()
  (sampled_observations,
   sampled_factors,
   factor_sizes) = semi_supervised_utils.sample_supervised_data(
       supervised_data_seed, dataset, num_labelled_samples)
  # We instantiate the model class.
  if  issubclass(model, semi_supervised_vae.BaseS2VAE):
    model = model(factor_sizes)
  else:
    model = model()

  # We create a TPUEstimator based on the provided model. This is primarily so
  # that we could switch to TPU training in the future. For now, we train
  # locally on GPUs.
  run_config = tpu_config.RunConfig(
      tf_random_seed=model_seed,
      keep_checkpoint_max=1,
      tpu_config=tpu_config.TPUConfig(iterations_per_loop=500))
  tpu_estimator = TPUEstimator(
      use_tpu=False,
      model_fn=model.model_fn,
      model_dir=model_dir,
      train_batch_size=batch_size,
      eval_batch_size=batch_size,
      config=run_config)

  # Set up time to keep track of elapsed time in results.
  experiment_timer = time.time()
  # Do the actual training.
  tpu_estimator.train(
      input_fn=_make_input_fn(dataset, num_labelled_samples,
                              unsupervised_data_seed, sampled_observations,
                              sampled_factors, train_percentage),
      steps=training_steps)
  # Save model as a TFHub module.
  output_shape = named_data.get_named_ground_truth_data().observation_shape
  module_export_path = os.path.join(model_dir, "tfhub")
  gaussian_encoder_model.export_as_tf_hub(model, output_shape,
                                          tpu_estimator.latest_checkpoint(),
                                          module_export_path)

  # Save the results. The result dir will contain all the results and config
  # files that we copied along, as we progress in the pipeline. The idea is that
  # these files will be available for analysis at the end.
  results_dict = tpu_estimator.evaluate(
      input_fn=_make_input_fn(
          dataset,
          num_labelled_samples,
          unsupervised_data_seed,
          sampled_observations,
          sampled_factors,
          train_percentage,
          num_batches=num_labelled_samples,
          validation=True))
  results_dir = os.path.join(model_dir, "results")
  results_dict["elapsed_time"] = time.time() - experiment_timer
  results.update_result_directory(results_dir, "train", results_dict)