Esempio n. 1
0
def validate(hp: tf.contrib.training.HParams) -> tf.contrib.training.HParams:
    """Ensures the HParams is of the expected type."""
    members = [
        # Optimization.
        ('optimizer', Optimizer),
        ('initial_lr', float),
        ('lr_decay', LRDecaySchedule),
        ('decay_steps', int),
        ('weight_decay', float),
        ('momentum', float),
        ('use_nesterov', bool),
        # Architecture.
        ('n_filters_1', int),
        ('n_filters_2', int),
        ('n_filters_3', int),
        ('stride_1', int),
        ('stride_2', int),
        ('stride_3', int),
        ('depthwise', bool),
        ('num_residual_units_1', int),
        ('num_residual_units_2', int),
        ('num_residual_units_3', int),
        ('k', int),
        ('activation_1', Activation),
        ('activation_2', Activation),
        ('activation_3', Activation),
        ('n_conv_layers_1', int),
        ('n_conv_layers_2', int),
        ('n_conv_layers_3', int),
        ('dropout_1', float),
        ('dropout_2', float),
        ('dropout_3', float),
        # Misc.
        ('batch_size', int),
        ('num_epochs', int),
        ('replicate', int),  # Number of replications per configuration.
    ]

    if len(members) != len(hp.values()):
        raise TypeError('tf.contrib.training.HParams has wrong size: %r' % hp)

    for name, tpe in members:
        if hp.get(name) is None:
            raise TypeError(
                'tf.contrib.training.HParams does not include member: %s' %
                name)
        if not isinstance(hp.get(name), tpe):
            raise TypeError(
                'Member %s in tf.contrib.training.HParams has type %r but should be '
                '%r' % (name, type(hp.get(name)), tpe))

    return hp
Esempio n. 2
0
def set_data_dependent_hparams(hparams: tf.contrib.training.HParams,
                               snapshots: np.ndarray):
    """Add data-dependent hyperparameters to hparams.

  Added hyper-parameters:
    error_scale: List[float] with length 2*num_channels indicating the
      scaling in the loss to use on squared error and relative squared error
      for each derivative target.
    error_floor: List[float] with length num_channels giving the scale for
      weighting of relative errors.

  Args:
    hparams: hyper-parameters for training. Will be modified by adding
      'error_floor' and 'error_scale' entries (lists of float).
    snapshots: np.ndarray with shape [examples, x] with high-resolution
      training data.
  """
    error_floor, error_scale = determine_loss_scales(snapshots, hparams)
    hparams.set_hparam('error_scale', error_scale.ravel().tolist())
    hparams.set_hparam('error_floor', error_floor.tolist())
def hparams_to_json(hp: tf.contrib.training.HParams) -> Text:
    """Converts HParams to JSON."""
    d = hp.values()

    def sanitize(v):
        if isinstance(v, enum.Enum):
            return v.name
        return v

    sanitized = {k: sanitize(v) for k, v in d.items()}
    return json.dumps(sanitized, indent=2, sort_keys=True)
def update_hparams(
        hp: tf.contrib.training.HParams) -> tf.contrib.training.HParams:
    """Converts a Vizier object to a valid HParams."""
    logging.info('Updating HParams from: %r', hp)

    d = hp.values()

    # Add implicit hyperparameter.
    if 'decay_steps' not in d:
        d['decay_steps'] = int(hp.num_epochs *
                               (NUM_TRAIN_EXAMPLES // hp.batch_size))

    def to_bool(value: Union[bool, Text]) -> bool:
        if isinstance(value, bool):
            return value
        return {'False': False, 'True': True}[value]

    # Use booleans.
    d['use_nesterov'] = to_bool(hp.use_nesterov)
    d['depthwise'] = to_bool(hp.depthwise)

    # Use enum types.
    d['optimizer'] = model.Optimizer[hp.optimizer]
    d['lr_decay'] = model.LRDecaySchedule[hp.lr_decay]
    d['activation_1'] = model.Activation[hp.activation_1]
    d['activation_2'] = model.Activation[hp.activation_2]
    d['activation_3'] = model.Activation[hp.activation_3]

    # For some reason floats are being cast to ints.
    d['weight_decay'] = float(hp.weight_decay)
    d['dropout_1'] = float(hp.dropout_1)
    d['dropout_2'] = float(hp.dropout_2)
    d['dropout_3'] = float(hp.dropout_3)

    hp = tf.contrib.training.HParams(**d)

    return model.validate(hp)
Esempio n. 5
0
def training_loop(snapshots: np.ndarray,
                  checkpoint_dir: str,
                  hparams: tf.contrib.training.HParams,
                  master: str = '') -> pd.DataFrame:
    """Run training.

  Args:
    snapshots: np.ndarray with shape [examples, x] with high-resolution
      training data.
    checkpoint_dir: directory to which to save model checkpoints.
    hparams: hyperparameters for training, as created by create_hparams().
    master: string master to use for MonitoredTrainingSession.

  Returns:
    pd.DataFrame with metrics for the full training run.
  """
    hparams = copy.deepcopy(hparams)
    set_data_dependent_hparams(hparams, snapshots)
    logging.info('Training with hyperparameters:\n%r', hparams)

    hparams_path = os.path.join(checkpoint_dir, 'hparams.pbtxt')
    with tf.gfile.GFile(hparams_path, 'w') as f:
        f.write(str(hparams.to_proto()))

    logging.info('Setting up training')
    _, train_step = setup_training(snapshots, hparams)
    train_inferer = Inferer(snapshots, hparams, training=True)
    test_inferer = Inferer(snapshots, hparams, training=False)

    global_step = tf.train.get_or_create_global_step()

    logging.info('Variables: %s', '\n'.join(map(str,
                                                tf.trainable_variables())))

    logged_metrics = []
    equation_type = equations.equation_type_from_hparams(hparams)

    with tf.train.MonitoredTrainingSession(
            master=master,
            checkpoint_dir=checkpoint_dir,
            save_checkpoint_secs=300,
            config=_disable_rewrite_config(),
            hooks=[SaveAtEnd(checkpoint_dir_to_path(checkpoint_dir))]) as sess:

        test_writer = tf.summary.FileWriter(os.path.join(
            checkpoint_dir, 'test'),
                                            sess.graph,
                                            flush_secs=60)
        train_writer = tf.summary.FileWriter(os.path.join(
            checkpoint_dir, 'train'),
                                             sess.graph,
                                             flush_secs=60)

        initial_step = sess.run(global_step)

        with test_writer, train_writer:
            for step in range(initial_step, hparams.learning_stops[-1]):
                sess.run(train_step)

                if (step + 1) % hparams.eval_interval == 0:
                    train_inference_data = train_inferer.run(sess)
                    test_inference_data = test_inferer.run(sess)

                    train_metrics = calculate_metrics(train_inference_data,
                                                      equation_type)
                    test_metrics = calculate_metrics(test_inference_data,
                                                     equation_type)
                    logged_metrics.append((step, test_metrics, train_metrics))

                    logging.info(metrics_one_linear(test_metrics))
                    save_summaries(test_metrics, test_writer, global_step=step)
                    save_summaries(train_metrics,
                                   train_writer,
                                   global_step=step)

    return metrics_to_dataframe(logged_metrics)
 def drop_batch_param(
     hp: tf.contrib.training.HParams, ) -> tf.contrib.training.HParams:
     d = hp.values()
     d.pop('batch_size')
     return tf.contrib.training.HParams(**d)