def validate(hp: tf.contrib.training.HParams) -> tf.contrib.training.HParams: """Ensures the HParams is of the expected type.""" members = [ # Optimization. ('optimizer', Optimizer), ('initial_lr', float), ('lr_decay', LRDecaySchedule), ('decay_steps', int), ('weight_decay', float), ('momentum', float), ('use_nesterov', bool), # Architecture. ('n_filters_1', int), ('n_filters_2', int), ('n_filters_3', int), ('stride_1', int), ('stride_2', int), ('stride_3', int), ('depthwise', bool), ('num_residual_units_1', int), ('num_residual_units_2', int), ('num_residual_units_3', int), ('k', int), ('activation_1', Activation), ('activation_2', Activation), ('activation_3', Activation), ('n_conv_layers_1', int), ('n_conv_layers_2', int), ('n_conv_layers_3', int), ('dropout_1', float), ('dropout_2', float), ('dropout_3', float), # Misc. ('batch_size', int), ('num_epochs', int), ('replicate', int), # Number of replications per configuration. ] if len(members) != len(hp.values()): raise TypeError('tf.contrib.training.HParams has wrong size: %r' % hp) for name, tpe in members: if hp.get(name) is None: raise TypeError( 'tf.contrib.training.HParams does not include member: %s' % name) if not isinstance(hp.get(name), tpe): raise TypeError( 'Member %s in tf.contrib.training.HParams has type %r but should be ' '%r' % (name, type(hp.get(name)), tpe)) return hp
def set_data_dependent_hparams(hparams: tf.contrib.training.HParams, snapshots: np.ndarray): """Add data-dependent hyperparameters to hparams. Added hyper-parameters: error_scale: List[float] with length 2*num_channels indicating the scaling in the loss to use on squared error and relative squared error for each derivative target. error_floor: List[float] with length num_channels giving the scale for weighting of relative errors. Args: hparams: hyper-parameters for training. Will be modified by adding 'error_floor' and 'error_scale' entries (lists of float). snapshots: np.ndarray with shape [examples, x] with high-resolution training data. """ error_floor, error_scale = determine_loss_scales(snapshots, hparams) hparams.set_hparam('error_scale', error_scale.ravel().tolist()) hparams.set_hparam('error_floor', error_floor.tolist())
def hparams_to_json(hp: tf.contrib.training.HParams) -> Text: """Converts HParams to JSON.""" d = hp.values() def sanitize(v): if isinstance(v, enum.Enum): return v.name return v sanitized = {k: sanitize(v) for k, v in d.items()} return json.dumps(sanitized, indent=2, sort_keys=True)
def update_hparams( hp: tf.contrib.training.HParams) -> tf.contrib.training.HParams: """Converts a Vizier object to a valid HParams.""" logging.info('Updating HParams from: %r', hp) d = hp.values() # Add implicit hyperparameter. if 'decay_steps' not in d: d['decay_steps'] = int(hp.num_epochs * (NUM_TRAIN_EXAMPLES // hp.batch_size)) def to_bool(value: Union[bool, Text]) -> bool: if isinstance(value, bool): return value return {'False': False, 'True': True}[value] # Use booleans. d['use_nesterov'] = to_bool(hp.use_nesterov) d['depthwise'] = to_bool(hp.depthwise) # Use enum types. d['optimizer'] = model.Optimizer[hp.optimizer] d['lr_decay'] = model.LRDecaySchedule[hp.lr_decay] d['activation_1'] = model.Activation[hp.activation_1] d['activation_2'] = model.Activation[hp.activation_2] d['activation_3'] = model.Activation[hp.activation_3] # For some reason floats are being cast to ints. d['weight_decay'] = float(hp.weight_decay) d['dropout_1'] = float(hp.dropout_1) d['dropout_2'] = float(hp.dropout_2) d['dropout_3'] = float(hp.dropout_3) hp = tf.contrib.training.HParams(**d) return model.validate(hp)
def training_loop(snapshots: np.ndarray, checkpoint_dir: str, hparams: tf.contrib.training.HParams, master: str = '') -> pd.DataFrame: """Run training. Args: snapshots: np.ndarray with shape [examples, x] with high-resolution training data. checkpoint_dir: directory to which to save model checkpoints. hparams: hyperparameters for training, as created by create_hparams(). master: string master to use for MonitoredTrainingSession. Returns: pd.DataFrame with metrics for the full training run. """ hparams = copy.deepcopy(hparams) set_data_dependent_hparams(hparams, snapshots) logging.info('Training with hyperparameters:\n%r', hparams) hparams_path = os.path.join(checkpoint_dir, 'hparams.pbtxt') with tf.gfile.GFile(hparams_path, 'w') as f: f.write(str(hparams.to_proto())) logging.info('Setting up training') _, train_step = setup_training(snapshots, hparams) train_inferer = Inferer(snapshots, hparams, training=True) test_inferer = Inferer(snapshots, hparams, training=False) global_step = tf.train.get_or_create_global_step() logging.info('Variables: %s', '\n'.join(map(str, tf.trainable_variables()))) logged_metrics = [] equation_type = equations.equation_type_from_hparams(hparams) with tf.train.MonitoredTrainingSession( master=master, checkpoint_dir=checkpoint_dir, save_checkpoint_secs=300, config=_disable_rewrite_config(), hooks=[SaveAtEnd(checkpoint_dir_to_path(checkpoint_dir))]) as sess: test_writer = tf.summary.FileWriter(os.path.join( checkpoint_dir, 'test'), sess.graph, flush_secs=60) train_writer = tf.summary.FileWriter(os.path.join( checkpoint_dir, 'train'), sess.graph, flush_secs=60) initial_step = sess.run(global_step) with test_writer, train_writer: for step in range(initial_step, hparams.learning_stops[-1]): sess.run(train_step) if (step + 1) % hparams.eval_interval == 0: train_inference_data = train_inferer.run(sess) test_inference_data = test_inferer.run(sess) train_metrics = calculate_metrics(train_inference_data, equation_type) test_metrics = calculate_metrics(test_inference_data, equation_type) logged_metrics.append((step, test_metrics, train_metrics)) logging.info(metrics_one_linear(test_metrics)) save_summaries(test_metrics, test_writer, global_step=step) save_summaries(train_metrics, train_writer, global_step=step) return metrics_to_dataframe(logged_metrics)
def drop_batch_param( hp: tf.contrib.training.HParams, ) -> tf.contrib.training.HParams: d = hp.values() d.pop('batch_size') return tf.contrib.training.HParams(**d)