def eval_function(hparams_dict): """ This function takes a haperparameter configuration, trains the corresponding model on the training data set, creates the predictions, and returns the evaluated MAPE on the evaluation data set. """ # set the data directory file_dir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) data_dir = os.path.join(file_dir, data_relative_dir) hparams_dict = dict(hparams_dict) for key in LIST_HYPERPARAMETER: hparams_dict[key] = [hparams_dict[key]] # add the value of other hyper parameters which are not tuned hparams_dict["encoder_rnn_layers"] = 1 hparams_dict["decoder_rnn_layers"] = 1 hparams_dict["decoder_variational_dropout"] = [False] hparams_dict["asgd_decay"] = None hparams = training.HParams(**hparams_dict) # use round 1 training data for hyper parameter tuning to avoid data leakage for later rounds submission_round = 1 make_features_flag = False train_model_flag = True train_back_offset = 3 # equal to predict_window predict_cut_mode = "eval" # get prediction pred_o, train_mape = create_round_prediction( data_dir, submission_round, hparams, make_features_flag=make_features_flag, train_model_flag=train_model_flag, train_back_offset=train_back_offset, predict_cut_mode=predict_cut_mode, ) # get rid of prediction at horizon 1 pred_sub = pred_o[:, 1:].reshape((-1)) # evaluate the prediction on last two days in the first round training data # TODO: get train error and evalution error for different parameters train_file = os.path.join( data_dir, "train/train_round_{}.csv".format(submission_round)) train = pd.read_csv(train_file, index_col=False) train_last_week = bs.TRAIN_END_WEEK_LIST[submission_round - 1] # filter the train to contain ony last two days' data train = train.loc[train["week"] >= train_last_week - 1] # create the data frame without missing dates store_list = train["store"].unique() brand_list = train["brand"].unique() week_list = range(train_last_week - 1, train_last_week + 1) item_list = list(itertools.product(store_list, brand_list, week_list)) item_df = pd.DataFrame.from_records(item_list, columns=["store", "brand", "week"]) train = item_df.merge(train, how="left", on=["store", "brand", "week"]) result = train.sort_values(by=["store", "brand", "week"], ascending=True) result["prediction"] = pred_sub result["sales"] = result["logmove"].apply(lambda x: round(np.exp(x))) # calculate MAPE on the evaluate set result = result.loc[result["sales"].notnull()] eval_mape = MAPE(result["prediction"], result["sales"]) return eval_mape
__author__ = 'KKishore' import tensorflow as tf from tensorflow.contrib import training from model.cnn_model import model_fn, input_fn, serving_fn tf.logging.set_verbosity(tf.logging.INFO) N_WORDS = 0 with open('data/nwords.csv', 'r') as f: N_WORDS = int(f.read()) + 2 hparams = training.HParams(N_WORDS=N_WORDS) print(N_WORDS) estimator = tf.estimator.Estimator(model_fn=model_fn, params=hparams, model_dir='build/') estimator.train( input_fn=lambda: input_fn('data/train.tsv', shuffle=True, repeat_count=5)) evaluated_results = estimator.evaluate( input_fn=lambda: input_fn('data/dev.tsv', shuffle=False, repeat_count=1)) print("# Evaluated Results: {}".format(evaluated_results)) estimator.export_savedmodel(export_dir_base='serving',
def run_trial(trial_idx, delta, algo_names): """Runs a trial of wheel bandit problem instance for a set of algorithms.""" filename = os.path.join(FLAGS.datasetdir, str(delta) + '_' + str(trial_idx) + '.npz') with gfile.GFile(filename, 'r') as f: sampled_vals = np.load(f) dataset = sampled_vals['dataset'] opt_rewards = sampled_vals['opt_rewards'] x_hidden_size = 100 x_encoder_sizes = [x_hidden_size] * 2 algos = [] for algo_name in algo_names: if algo_name == 'uniform': hparams = contrib_training.HParams(num_actions=num_actions) algos.append(uniform_sampling.UniformSampling(algo_name, hparams)) elif algo_name == 'neurolinear': hparams = contrib_training.HParams(num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, layer_sizes=x_encoder_sizes, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=1, training_freq_network=20, training_epochs=50, a0=12, b0=30, lambda_prior=23) algos.append( neural_linear_sampling.NeuralLinearPosteriorSampling( algo_name, hparams)) elif algo_name == 'multitaskgp': hparams_gp = contrib_training.HParams( num_actions=num_actions, num_outputs=num_actions, context_dim=context_dim, reset_lr=False, learn_embeddings=True, max_num_points=1000, show_training=False, freq_summary=1000, batch_size=512, keep_fixed_after_max_obs=True, training_freq=20, initial_pulls=2, training_epochs=50, lr=0.01, buffer_s=-1, initial_lr=0.001, lr_decay_rate=0.0, optimizer='RMS', task_latent_dim=5, activate_decay=False) algos.append( posterior_bnn_sampling.PosteriorBNNSampling( algo_name, hparams_gp, 'GP')) elif algo_name[:3] == 'gnp': hidden_size = 64 x_encoder_net_sizes = None decoder_net_sizes = [hidden_size] * 3 + [2 * num_actions] heteroskedastic_net_sizes = None att_type = 'multihead' att_heads = 8 data_uncertainty = False config = algo_name.split('_') model_type = config[1] if algo_name[:len('gnp_anp_beta_')] == 'gnp_anp_beta_': mfile = algo_name + FLAGS.suffix x_y_encoder_net_sizes = [hidden_size] * 3 global_latent_net_sizes = [hidden_size] * 2 local_latent_net_sizes = None beta = float(config[3]) temperature = float(config[5]) else: mfile = FLAGS.prefix + config[1] + FLAGS.suffix if model_type == 'cnp': x_y_encoder_net_sizes = [hidden_size] * 4 global_latent_net_sizes = None local_latent_net_sizes = None elif model_type == 'np': x_y_encoder_net_sizes = [hidden_size] * 2 global_latent_net_sizes = [hidden_size] * 2 local_latent_net_sizes = None elif model_type == 'anp': x_y_encoder_net_sizes = [hidden_size] * 2 global_latent_net_sizes = [hidden_size] * 2 local_latent_net_sizes = None elif model_type == 'acnp': x_y_encoder_net_sizes = [hidden_size] * 4 global_latent_net_sizes = None local_latent_net_sizes = None elif model_type == 'acns': x_y_encoder_net_sizes = [hidden_size] * 2 global_latent_net_sizes = [hidden_size] * 2 local_latent_net_sizes = [hidden_size] * 2 beta = 1. temperature = 1. mpath = os.path.join(FLAGS.modeldir, mfile) hparams = contrib_training.HParams( num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, x_encoder_net_sizes=x_encoder_net_sizes, x_y_encoder_net_sizes=x_y_encoder_net_sizes, global_latent_net_sizes=global_latent_net_sizes, local_latent_net_sizes=local_latent_net_sizes, decoder_net_sizes=decoder_net_sizes, heteroskedastic_net_sizes=heteroskedastic_net_sizes, att_type=att_type, att_heads=att_heads, model_type=model_type, data_uncertainty=data_uncertainty, beta=beta, temperature=temperature, model_path=mpath, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=10, training_freq_network=20, training_epochs=50) algos.append( offline_contextual_bandits_gnp.OfflineContextualBandits( algo_name, hparams)) t_init = time.time() _, h_rewards = contextual_bandit.run_contextual_bandit( context_dim, num_actions, dataset, algos, num_contexts=FLAGS.num_contexts) # pytype: disable=wrong-keyword-args t_final = time.time() return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]
return self._generate_events( num_steps=num_steps, primer_events=primer_sequence, temperature=None, beam_size=beam_size, branch_factor=branch_factor, steps_per_iteration=steps_per_iteration) default_configs = { 'rnn-nade': events_rnn_model.EventSequenceRnnConfig( magenta.protobuf.generator_pb2.GeneratorDetails( id='rnn-nade', description='RNN-NADE'), mm.PianorollEncoderDecoder(), contrib_training.HParams( batch_size=64, rnn_layer_sizes=[128, 128, 128], nade_hidden_units=128, dropout_keep_prob=0.5, clip_norm=5, learning_rate=0.001)), 'rnn-nade_attn': events_rnn_model.EventSequenceRnnConfig( magenta.protobuf.generator_pb2.GeneratorDetails( id='rnn-nade_attn', description='RNN-NADE with attention.'), mm.PianorollEncoderDecoder(), contrib_training.HParams( batch_size=48, rnn_layer_sizes=[128, 128], attn_length=32, nade_hidden_units=128, dropout_keep_prob=0.5, clip_norm=5,
def get_pruning_hparams(): """Get a tf.HParams object with the default values for the hyperparameters. name: string name of the pruning specification. Used for adding summaries and ops under a common tensorflow name_scope begin_pruning_step: integer the global step at which to begin pruning end_pruning_step: integer the global step at which to terminate pruning. Defaults to -1 implying that pruning continues till the training stops weight_sparsity_map: list of strings comma separed list of {weight_variable_name:target sparsity} or {regex:target sparsity} pairs. For layers/weights not in this list, sparsity as specified by the target_sparsity hyperparameter is used. Eg. [conv1:0.9,conv2/kernel:0.8] block_dims_map: list of strings comma separated list of {weight variable name:block_height x block_width} or {regex:block_height x block_width} pairs. For layers/weights not in this list, block dims are specified by the block_height, block_width hyperparameters are used Eg. [dense1:4x4,dense2:1x16,dense3:1x1] threshold_decay: float the decay factor to use for exponential decay of the thresholds pruning_frequency: integer How often should the masks be updated? (in # of global_steps) nbins: integer number of bins to use for histogram computation block_height: integer number of rows in a block (defaults to 1), can be -1 in which case it is set to the size of the corresponding weight tensor. block_width: integer number of cols in a block (defaults to 1), can be -1 in which case it is set to the size of the corresponding weight tensor. block_pooling_function: string Whether to perform average (AVG) or max (MAX) pooling in the block (default: AVG) initial_sparsity: float initial sparsity value target_sparsity: float target sparsity value sparsity_function_begin_step: integer the global step at this which the gradual sparsity function begins to take effect sparsity_function_end_step: integer the global step used as the end point for the gradual sparsity function sparsity_function_exponent: float exponent = 1 is linearly varying sparsity between initial and final. exponent > 1 varies more slowly towards the end than the beginning use_tpu: False Indicates whether to use TPU gradient_decay_rate: float when prune_option is gradient based pruning, decay factor for gradient decay prune_option: string option = 'weight' means using |weight| for pruning. option = 'first_order_gradient' means using |weight| * |first order gradient| for pruning. option = 'second_order_gradient' means using |weight| * |second order gradient| for pruning. second order gradient is approximated by |weight + old_old_weight - 2*old_weight|. option = 'compression' means using compression. alpha_decrement_value: only effective when prune_option is 'compression', see graph_compression/compression_lib/compression_op.py. The following arguments are all only effective when prune_option == 'compression', see graph_compression/compression_lib/compression_op.py for details. begin_compression_step: only effective when prune_option is 'compression', see graph_compression/compression_op.py. end_compresson_step: only effective when prune_option is 'compression', see graph_compression/compression_op.py. compression_frequency: only effective when prune_option is 'compression', see graph_compression/compression_op.py. compression_option: only effective when prune_option is 'compression', see graph_compression/compression_op.py. rank: only effective when prune_option is 'compression', see graph_compression/compression_op.py. update_option: only effective when prune_option is 'compression', see graph_compression/compression_op.py. run_update_interval_check: only effective when prune_option is 'compression' see graph_compression/compression_op.py. pruning_fraction: only effective when prune_option is 'compression', see graph_compression/compression_op.py. We use the following sparsity function: num_steps = (sparsity_function_end_step - sparsity_function_begin_step)/pruning_frequency sparsity(step) = (initial_sparsity - target_sparsity)* [1-step/(num_steps -1)]**exponent + target_sparsity Args: None Returns: tf.HParams object initialized to default values """ return contrib_training.HParams(name='model_pruning', begin_pruning_step=0, end_pruning_step=-1, weight_sparsity_map=[''], block_dims_map=[''], threshold_decay=0.0, pruning_frequency=10, nbins=256, block_height=1, block_width=1, block_pooling_function='AVG', initial_sparsity=0.0, target_sparsity=0.5, sparsity_function_begin_step=0, sparsity_function_end_step=100, sparsity_function_exponent=3.0, use_tpu=False, gradient_decay_rate=0.99, prune_option='weight', alpha_decrement_value=0.01, begin_compression_step=0, end_compresson_step=-1, compression_frequency=10, compression_option=0, rank=7, update_option=0, run_update_interval_check=1, pruning_fraction=0.4)
def run_trial(trial_idx, delta, algo_names): """Runs a trial of wheel bandit problem instance for a set of algorithms.""" all_algo_names = '_'.join(algo_names) runfile = str(delta) + '_' + str(trial_idx) + '_' + all_algo_names + '.pkl' savefile = os.path.join(FLAGS.savedir, runfile) if gfile.Exists(savefile): print('File exists...terminating') with gfile.Open(savefile, 'rb') as infile: saved_state = pickle.load(infile, encoding='latin-1') return saved_state['h_rewards'], saved_state['time'] filename = os.path.join(FLAGS.datasetdir, str(delta) + '_' + str(trial_idx) + '.npz') with gfile.GFile(filename, 'r') as f: sampled_vals = np.load(f) dataset = sampled_vals['dataset'] x_hidden_size = 100 x_encoder_sizes = [x_hidden_size] * 2 algos = [] ckptfile = None save_once = False for algo_name in algo_names: if algo_name == 'uniform': hparams = contrib_training.HParams(num_actions=num_actions) algos.append(uniform_sampling.UniformSampling(algo_name, hparams)) elif algo_name == 'neurolinear': hparams = contrib_training.HParams(num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, layer_sizes=x_encoder_sizes, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=1, training_freq_network=20, training_epochs=50, a0=12, b0=30, lambda_prior=23) algos.append( neural_linear_sampling.NeuralLinearPosteriorSampling( algo_name, hparams)) elif algo_name == 'multitaskgp': hparams_gp = contrib_training.HParams( num_actions=num_actions, num_outputs=num_actions, context_dim=context_dim, reset_lr=False, learn_embeddings=True, max_num_points=1000, show_training=False, freq_summary=1000, batch_size=512, keep_fixed_after_max_obs=True, training_freq=20, initial_pulls=2, training_epochs=50, lr=0.01, buffer_s=-1, initial_lr=0.001, lr_decay_rate=0.0, optimizer='RMS', task_latent_dim=5, activate_decay=False) algos.append( posterior_bnn_sampling.PosteriorBNNSampling( algo_name, hparams_gp, 'GP')) elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp': hidden_size = 64 latent_units = 32 global_latent_net_sizes = [hidden_size] * 2 + [2 * latent_units] if algo_name[:3] == 'snp': local_latent_net_sizes = [hidden_size] * 3 + [2] else: local_latent_net_sizes = [hidden_size] * 3 + [2 * 5] x_y_encoder_sizes = [hidden_size] * 3 heteroskedastic_net_sizes = None mean_att_type = attention.laplace_attention scale_att_type_1 = attention.laplace_attention scale_att_type_2 = attention.laplace_attention att_type = 'multihead' att_heads = 8 data_uncertainty = False is_anp = True if algo_name[:3] == 'anp' else False hparams = contrib_training.HParams( num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, x_encoder_sizes=x_encoder_sizes, x_y_encoder_sizes=x_y_encoder_sizes, global_latent_net_sizes=global_latent_net_sizes, local_latent_net_sizes=local_latent_net_sizes, heteroskedastic_net_sizes=heteroskedastic_net_sizes, att_type=att_type, att_heads=att_heads, mean_att_type=mean_att_type, scale_att_type_1=scale_att_type_1, scale_att_type_2=scale_att_type_2, data_uncertainty=data_uncertainty, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=10, training_freq_network=20, training_epochs=50, uncertainty_type='attentive_freeform', local_variational=True, model_path=None, is_anp=is_anp) config = algo_name.split('_') if config[1] == 'prior': hparams.set_hparam('local_variational', False) if config[2] == 'gp': hparams.set_hparam('uncertainty_type', 'attentive_gp') if config[3] == 'warmstart' or config[3] == 'offline': mfile = FLAGS.prefix + config[1] + '_' + config[ 2] + FLAGS.suffix if algo_name[:3] == 'anp': mfile = 'anp_' + mfile mpath = os.path.join(FLAGS.modeldir, mfile) hparams.set_hparam('model_path', mpath) if config[3] == 'online' or config[3] == 'warmstart': algos.append( online_contextual_bandits.OnlineContextualBandits( algo_name, hparams)) else: algos.append( offline_contextual_bandits.OfflineContextualBandits( algo_name, hparams)) ckptfile = os.path.join(FLAGS.ckptdir, runfile) if gfile.Exists(ckptfile): save_once = True t_init = time.time() print('started') _, h_rewards = run_contextual_bandit(dataset, algos, save_once=save_once, pkl_file=ckptfile) t_final = time.time() savedict = {'h_rewards': h_rewards, 'time': t_final - t_init} with gfile.Open(savefile, 'wb') as outfile: pickle.dump(savedict, outfile) return h_rewards, t_final - t_init
}), # Reverb (for now just single-parameter). ('reverb', { 'reverberance': (0.0, 70.0, 'linear'), }), ] # Default hyperparameter values from the above pipeline. Note the additional # `transform_audio` hparam that defaults to False, i.e. by default no audio # transformation will be performed. DEFAULT_AUDIO_TRANSFORM_HPARAMS = contrib_training.HParams( transform_audio=False, audio_transform_noise_type='pinknoise', audio_transform_min_noise_vol=0.0, audio_transform_max_noise_vol=0.04, **dict(('audio_transform_%s_%s_%s' % (m, stage_name, param_name), value) for stage_name, params_dict in AUDIO_TRANSFORM_PIPELINE for param_name, (min_value, max_value, _) in params_dict.items() for m, value in [('min', min_value), ('max', max_value)])) class AudioTransformParameter(object): """An audio transform parameter with min and max value.""" def __init__(self, name, min_value, max_value, scale): """Initialize an AudioTransformParameter. Args: name: The name of the parameter. Should be the same as the name of the parameter passed to sox. min_value: The minimum value of the parameter, a float.
def testIntegratedGradientAttribution(self): # Due to complexity of the indicator we cannot easily extend this test to # > 1 lab test. obs_values = tf.constant([[[10000.0], [15000.0], [2.0]], [[0.0], [100.0], [2000.0]]]) # We compare these values to a linear interpolation between the second to # the last and the last value of the test. obs_values_base = tf.constant([[[10000.0], [15000.0], [15000.0]], [[0.0], [100.0], [100.0]]]) # For this test we need to select all attributions in order for consistency # to hold. indicator = tf.ones(shape=[2, 3, 1], dtype=tf.float32) delta_time = tf.constant([[[1000], [999], [2]], [[1001], [500], [20]]], dtype=tf.float32) # Selected so that the attribution is only over the third time step in both # batch entries. attribution_max_delta_time = 100 num_classes = 1 diff_delta_time = tf.constant( [[[1000], [1], [997]], [[1001], [501], [480]]], dtype=tf.float32) # This is also important to not loose any time steps in the attribution. sequence_length = tf.constant([3, 3]) # TODO(milah): Not clear why this test doesn't work for the RNN. def construct_logits_fn(unused_diff_delta_time, obs_values, unused_indicator, unused_sequence_length, unused_seq_mask, unused_hparams, reuse): result = tf.layers.dense(obs_values, num_classes, name='test1', reuse=reuse, activation=None) * (tf.expand_dims( obs_values[:, 0, :], axis=1) + 0.5) return result, None # First setup the weights of the RNN. logits, _ = construct_logits_fn(diff_delta_time, obs_values, indicator, sequence_length, None, None, False) # To verify the correctness of the attribution we compute the prediction at # the obs_values_base. base_logits, _ = construct_logits_fn(diff_delta_time, obs_values_base, indicator, sequence_length, None, None, True) # Set high for increased precision of the approximation. num_steps = 100 hparams = contrib_training.HParams( sequence_prediction=True, use_rnn_attention=False, path_integrated_gradients_num_steps=num_steps, attribution_max_delta_time=attribution_max_delta_time) gradients = osm.compute_path_integrated_gradient_attribution( obs_values, indicator, diff_delta_time, delta_time, sequence_length, None, hparams, construct_logits_fn) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) actual_logits = sess.run(logits) actual_base_logits = sess.run(base_logits) actual_gradients = sess.run(gradients) self.assertAllClose(actual_logits - actual_base_logits, actual_gradients, atol=0.001)
def main(): args = _parse_arguments(sys.argv[1:]) hparams = training.HParams(**args.__dict__) tf.logging.set_verbosity(tf.logging.INFO) run_experiment(hparams)
DEFAULT_HPARAMS = tf_utils.merge_hparams( audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS, contrib_training.HParams( eval_batch_size=1, predict_batch_size=1, shuffle_buffer_size=64, sample_rate=16000, spec_type='mel', spec_mel_htk=True, spec_log_amplitude=True, spec_hop_length=512, spec_n_bins=229, spec_fmin=30.0, # A0 cqt_bins_per_octave=36, truncated_length_secs=0.0, max_expected_train_example_len=0, onset_length=32, offset_length=32, onset_mode='length_ms', onset_delay=0, min_frame_occupancy_for_label=0.0, jitter_amount_ms=0, min_duration_ms=0, backward_shift_amount_ms=0, velocity_scale=80.0, velocity_bias=10.0, drum_data_map='', drum_prediction_map='', velocity_loss_weight=1.0, splice_n_examples=0)) CONFIG_MAP = {}
def testBasicModelFn(self, sequence_prediction, include_gradients, include_gradients_sum_time, include_gradients_avg_time, include_path_integrated_gradients, include_diff_sequence_prediction, use_rnn_attention, attention_hidden_layer_dim, volatility_loss_factor): """This high-level tests ensures there are no errors during training. It also checks that the loss is decreasing. Args: sequence_prediction: Whether to consider the recent predictions in the loss or only the most last prediction. include_gradients: Whether to generate attribution with the gradients of the last predictions. include_gradients_sum_time: Whether to generate attribution with the gradients of the sum of the predictions over time. include_gradients_avg_time: Whether to generate attribution with the gradients of the average of the predictions over time. include_path_integrated_gradients: Whether to generate attribution with the integrated gradients of last predictions compared to their most recent values before attribution_max_delta_time. include_diff_sequence_prediction: Whether to generate attribution from the difference of consecutive predictions. use_rnn_attention: Whether to use attention for the RNN. attention_hidden_layer_dim: If use_rnn_attention what the dimensionality of a hidden layer should be (or 0 if none) of last output and intermediates before multiplying to obtain a weight. volatility_loss_factor: Include the sum of the changes in predictions across the sequence in the loss multiplied by this factor. """ num_steps = 2 hparams = contrib_training.HParams( batch_size=2, learning_rate=0.008, sequence_features=[ 'deltaTime', 'Observation.code', 'Observation.valueQuantity.value' ], categorical_values=['loinc:1', 'loinc:2', 'MISSING'], categorical_seq_feature='Observation.code', context_features=['sequenceLength'], feature_value='Observation.valueQuantity.value', label_key='label.in_hospital_death', attribution_threshold=-1.0, rnn_size=6, variational_recurrent_keep_prob=1.1, variational_input_keep_prob=1.1, variational_output_keep_prob=1.1, sequence_prediction=sequence_prediction, time_decayed=False, normalize=True, momentum=0.9, min_value=-1000.0, max_value=1000.0, volatility_loss_factor=volatility_loss_factor, attribution_max_delta_time=100000, input_keep_prob=1.0, include_sequence_prediction=sequence_prediction, include_gradients_attribution=include_gradients, include_gradients_sum_time_attribution=include_gradients_sum_time, include_gradients_avg_time_attribution=include_gradients_avg_time, include_path_integrated_gradients_attribution=( include_path_integrated_gradients), include_diff_sequence_prediction_attribution=( include_diff_sequence_prediction), use_rnn_attention=use_rnn_attention, attention_hidden_layer_dim=attention_hidden_layer_dim, path_integrated_gradients_num_steps=10, ) observation_values = tf.SparseTensor( indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [1, 0, 0], [1, 1, 0], [1, 2, 0]], values=[100.0, 2.3, 9999999.0, 0.5, 0.0, 4.0], dense_shape=[2, 3, 1]) model = osm.ObservationSequenceModel() model_fn = model.create_model_fn(hparams) features = { input_fn.CONTEXT_KEY_PREFIX + 'sequenceLength': tf.constant([[2], [3]], dtype=tf.int64), input_fn.SEQUENCE_KEY_PREFIX + 'Observation.code': tf.SparseTensor(indices=observation_values.indices, values=[ 'loinc:2', 'loinc:1', 'loinc:2', 'loinc:1', 'MISSING', 'loinc:1' ], dense_shape=observation_values.dense_shape), input_fn.SEQUENCE_KEY_PREFIX + 'Observation.valueQuantity.value': observation_values, input_fn.SEQUENCE_KEY_PREFIX + 'deltaTime': tf.constant([[[1], [2], [0]], [[1], [3], [4]]], dtype=tf.int64) } label_key = 'label.in_hospital_death' labels = {label_key: tf.constant([[1.0], [0.0]], dtype=tf.float32)} with tf.variable_scope('test'): model_fn_ops_train = model_fn(features, labels, tf.estimator.ModeKeys.TRAIN) with tf.variable_scope('test', reuse=True): features[input_fn.CONTEXT_KEY_PREFIX + 'label.in_hospital_death'] = tf.SparseTensor( indices=[[0, 0]], values=['expired'], dense_shape=[2, 1]) model_fn_ops_eval = model_fn(features, labels=None, mode=tf.estimator.ModeKeys.PREDICT) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) # Test train. for i in range(num_steps): loss, _ = sess.run( [model_fn_ops_train.loss, model_fn_ops_train.train_op]) if i == 0: initial_loss = loss self.assertLess(loss, initial_loss) # Test infer. sess.run(model_fn_ops_eval.predictions)
def setUp(self): self.config = events_rnn_model.EventSequenceRnnConfig( None, magenta.music.OneHotEventSequenceEncoderDecoder( magenta.music.MultiDrumOneHotEncoding()), contrib_training.HParams())
def create_hparams(hparams_override_str=''): """Creates default HParams with the option of overrides. Args: hparams_override_str: String with possible overrides. Returns: Default HParams. """ hparams = contrib_training.HParams( # Sequence features are bucketed by their age at time of prediction in: # [time_windows[0] - time_windows[1]), # [time_windows[1] - time_windows[2]), # ... time_windows=[ 5 * 365 * 24 * 60 * 60, # 5 years 365 * 24 * 60 * 60, # 1 year 30 * 24 * 60 * 60, # 1 month 7 * 24 * 60 * 60, # 1 week 1 * 24 * 60 * 60, # 1 day 0, # now ], batch_size=64, learning_rate=0.003, dedup=True, # Currently supported optimizers are Adam and Ftrl. optimizer='Ftrl', # Note that these regularization terms are only applied for Ftrl. l1_regularization_strength=0.0, l2_regularization_strength=0.0, include_age=True, age_boundaries=[1, 5, 18, 30, 50, 70, 90], categorical_context_features=['Patient.gender'], sequence_features=[ 'Composition.section.text.div.tokenized', 'Composition.type', 'Condition.code', 'Encounter.hospitalization.admitSource', 'Encounter.reason.hcc', 'MedicationRequest.contained.medication.code.gsn', 'Procedure.code.cpt', ], # Number of hash buckets to map the tokens of the sequence_features into. sequence_bucket_sizes=[ 17000, 16, 3052, 10, 62, 1600, 732, ], # List of strings each of which is a ':'-separated list of feature that we # want to concatenate over the time dimension time_crossed_features=[ '%s:%s:%s:%s' % ('Observation.code', 'Observation.value.quantity.value', 'Observation.value.quantity.unit', 'Observation.value.string') ], time_concat_bucket_sizes=[39571], context_bucket_sizes=[4], # Model type needs to be linear or dnn. model_type='linear', # In case of model_type of dnn we can specify the hidden layer dimension. dnn_hidden_units=[256], # In case of model_type of dnn we can specify the dropout probability. dnn_dropout=0.1) # hparams_override_str override any of the preceding hyperparameter values. if hparams_override_str: hparams = hparams.parse(hparams_override_str) return hparams
self.num_velocity_bins = num_velocity_bins self.control_signals = control_signals self.optional_conditioning = optional_conditioning self.note_performance = note_performance default_configs = { 'performance': PerformanceRnnConfig( magenta.music.protobuf.generator_pb2.GeneratorDetails( id='performance', description='Performance RNN'), magenta.music.OneHotEventSequenceEncoderDecoder( magenta.music.PerformanceOneHotEncoding()), contrib_training.HParams(batch_size=64, rnn_layer_sizes=[512, 512, 512], dropout_keep_prob=1.0, clip_norm=3, learning_rate=0.001)), 'performance_with_dynamics': PerformanceRnnConfig( magenta.music.protobuf.generator_pb2.GeneratorDetails( id='performance_with_dynamics', description='Performance RNN with dynamics'), magenta.music.OneHotEventSequenceEncoderDecoder( magenta.music.PerformanceOneHotEncoding(num_velocity_bins=32)), contrib_training.HParams(batch_size=64, rnn_layer_sizes=[512, 512, 512], dropout_keep_prob=1.0, clip_norm=3, learning_rate=0.001), num_velocity_bins=32),
def get_hparams(**kwargs): """Creates a set of default hyperparameters. Note that in addition to the hyperparameters described below, the full set of hyperparameters includes input_ops.get_hparams() for specifying the input data pipeline (see that function for input_ops hyperparameter descriptions). Model hyperparameters: grammar_path: String, the filename of txt file containing the grammar production rules. Expressions will be parsed by this grammar. learning_rate: Float, learning rate. learning_rate_decay_rate: Float, decay rate for tf.train.exponential_decay. learning_rate_decay_step: Integer, decay steps for tf.train.exponential_decay. optimizer: String, optimizer name. Must be one of tf.contrib.layers.OPTIMIZER_CLS_NAMES. save_checkpoints_secs: Integer, number of seconds between model checkpoints. keep_checkpoint_max: Integer, the maximum number of recent checkpoint files to keep. As new files are created, older files are deleted. If None or 0, all checkpoint files are kept. start_delay_secs: Integer, number of seconds to wait before starting evaluations. throttle_secs: Integer, number of seconds between evaluations. train_steps: Integer, maximum number of training steps. Set to None to train forever. eval_steps: Integer, number of steps for each evaluation. Set to None to evaluate the entire tune/test set. embedding_size: Integer, the size of production rule embedding. symbolic_properties: List of strings, symbolic properties to concatenate on embedding as conditions. numerical_points: List of floats, points to evaluate expression values. gru_hidden_sizes: List of integers, number of units for each GRU layer. bidirectional: Boolean, whether to use bidirectional RNN. generation_leading_powers_abs_sums: List of integers, the sum of leading power at 0 and at inf, defining the condition in generation. For example, if generation_leading_powers_abs_sums = [1, 2], expressions will be generated with the following conditions (leading_at_0, leading_at_inf): (0, 1), (-1, 0), (0, -1), (1, 0) (0, 2), (-1, 1), (-2, 0), (-1, -1), (0, -2), (1, -1), (2, 0), (1, 1) This is used for eval. num_expressions_per_condition: Integer, the number of expressions to generate for each condition. This is used for eval. Default 0, no generation in eval. exports_to_keep: Integer, the number of latest exported model to keep. Args: **kwargs: Dict of parameter overrides. Returns: HParams. """ hparams = contrib_training.HParams( grammar_path=None, learning_rate=0.01, learning_rate_decay_rate=1.0, learning_rate_decay_steps=100000, optimizer='Adagrad', save_checkpoints_secs=600, keep_checkpoint_max=20, start_delay_secs=300, throttle_secs=300, train_steps=None, eval_steps=None, embedding_size=10, symbolic_properties=core.HPARAMS_EMPTY_LIST_STRING, numerical_points=core.HPARAMS_EMPTY_LIST_FLOAT, gru_hidden_sizes=[100], bidirectional=False, generation_leading_powers_abs_sums=core.HPARAMS_EMPTY_LIST_INT, num_expressions_per_condition=0, exports_to_keep=50) # Add hparams from input_ops. # Using add_hparam ensures there are no duplicated parameters. for key, value in six.iteritems(input_ops.get_hparams().values()): if key in hparams.values(): continue # Skip duplicated parameters. hparams.add_hparam(key, value) return hparams.override_from_dict(kwargs)
def test_model_integration(self): features, labels = input_fn.get_input_fn( tf.estimator.ModeKeys.TRAIN, [self.input_data_dir], 'label.in_hospital_death.class', sequence_features=[ 'Observation.code', 'Observation.value.quantity.value', 'Observation.value.quantity.unit', 'Observation.code.harmonized:valueset-observation-name' ], dense_sequence_feature='Observation.value.quantity.value', required_sequence_feature= 'Observation.code.harmonized:valueset-observation-name', batch_size=2, shuffle=False)() num_steps = 2 hparams = contrib_training.HParams( batch_size=2, learning_rate=0.008, sequence_features=[ 'deltaTime', 'Observation.code', 'Observation.value.quantity.value' ], categorical_values=['loinc:4', 'loinc:6', 'loinc:1'], categorical_seq_feature='Observation.code', context_features=['sequenceLength'], feature_value='Observation.value.quantity.value', label_key='label.in_hospital_death.class', attribution_threshold=-1.0, rnn_size=6, variational_recurrent_keep_prob=1.1, variational_input_keep_prob=1.1, variational_output_keep_prob=1.1, sequence_prediction=False, time_decayed=False, normalize=True, momentum=0.9, min_value=-1000.0, max_value=1000.0, volatility_loss_factor=0.0, attribution_max_delta_time=100000, input_keep_prob=1.0, include_sequence_prediction=False, include_gradients_attribution=True, include_gradients_sum_time_attribution=False, include_gradients_avg_time_attribution=False, include_path_integrated_gradients_attribution=True, include_diff_sequence_prediction_attribution=False, use_rnn_attention=True, attention_hidden_layer_dim=5, path_integrated_gradients_num_steps=10, ) model = osm.ObservationSequenceModel() model_fn = model.create_model_fn(hparams) with tf.variable_scope('test'): model_fn_ops_train = model_fn(features, labels, tf.estimator.ModeKeys.TRAIN) with tf.variable_scope('test', reuse=True): model_fn_ops_eval = model_fn(features, labels=None, mode=tf.estimator.ModeKeys.PREDICT) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) # Test train. for i in range(num_steps): loss, _ = sess.run( [model_fn_ops_train.loss, model_fn_ops_train.train_op]) if i == 0: initial_loss = loss self.assertLess(loss, initial_loss) # Test infer. sess.run(model_fn_ops_eval.predictions)
def pegasus_large_params(param_overrides): """Params for PegasusLarge.""" hparams = contrib_training.HParams( train_pattern="tfds_transformed:common_crawl-train", dev_pattern="tfds_transformed:common_crawl-validation", test_pattern="tfds_transformed:common_crawl-test", vocab_filename="pegasus/ops/testdata/sp_test.model", encoder_type="sentencepiece_newline", parser_strategy="dynamic_rouge", parser_masked_sentence_ratio=0.45, parser_masked_words_ratio=0.0, # Configure the options of word masking # The sum of the three probs below (mask word by MSK, random, or intact) # should be 1. # By default, following the word masking procedure of BERT, which is # 80% by <MSK>, 10% by random tokens, 10% remain unchanged. parser_mask_word_by_msk_token_prob=0.8, parser_mask_word_by_random_token_prob=0.1, parser_mask_word_by_intact_prob=0.1, # Configure the options of sentence masking. # The sum of the four probs below (mask sentence by MSK, random, intact # or remove) should be 1. # The four sentence masking options: # 1. Masking seleted sentences by <MSK>. In practice, the <MSK> token # for sentences is different from the <MSK> token for words in order # to distinguish sentence masking and word masking. # 2. Masking selected sentences by another sentences which are randomly # picked from the same document. # 3. Masking selected sentences by leaving them unchanged. # 4. Masking selected sentences by removing them from inputs. parser_mask_sentence_by_msk_token_prob=0.9, parser_mask_sentence_by_random_sentence_prob=0., parser_mask_sentence_by_intact_prob=0.1, parser_mask_sentence_by_remove_prob=0., # rouge_ngrams_size: a positive integer parser_rouge_ngrams_size=1, # rouge_metric_type: precision, recall, F parser_rouge_metric_type="F", # rouge_compute_option: standard, deduplicate, log # standard: number of each ngram counted as it appears # deduplicate: number of each ngram counted once only # log: apply log(1+n) when compute the appearance of each ngram parser_rouge_compute_option="standard", parser_rouge_stopwords_filename= "pegasus/ops/testdata/english_stopwords", parser_rouge_noise_ratio=0.20, parser_dynamic_mask_min_ratio=0.33, # if greater than zero, assign target into buckets by # length // bucket_size, the bucket id is appended to the start of inputs. # the bucket id uses the reserved bucket ids, starting from the start id, # goes up to the maximum number of reseerved tokens. length_bucket_size=0, add_task_id=False, batch_size=16, max_input_len=512, max_target_len=256, max_decode_len=256, max_total_words=0, pretrain_target_filter_min=0, hidden_size=1024, filter_size=4096, num_heads=16, num_encoder_layers=16, num_decoder_layers=16, optimizer_name="adafactor", learning_rate=0.01, label_smoothing=0.0, dropout=0.1, train_steps=1500000, beam_size=1, eval_max_predictions=1000, use_bfloat16=False, model=None, encoder=None, parser=None, estimator_prediction_fn=None, eval=None, estimator_eval_metrics_fn=estimator_metrics.pretrain_eval_metrics_fn, ) if param_overrides: hparams.parse(param_overrides) # Check values if (hparams.parser_mask_word_by_msk_token_prob + hparams.parser_mask_word_by_random_token_prob + hparams.parser_mask_word_by_intact_prob) != 1.: raise ValueError("The sum of rates of the three word masking options " "(MSK, random, intact) does not equal to 1.") if (hparams.parser_mask_sentence_by_msk_token_prob + hparams.parser_mask_sentence_by_random_sentence_prob + hparams.parser_mask_sentence_by_intact_prob + hparams.parser_mask_sentence_by_remove_prob) != 1.: raise ValueError( "The sum of rates of the four sentence masking options " "(MSK, random, intact, skip) does not equal to 1.") hparams.encoder = public_parsing_ops.create_text_encoder( hparams.encoder_type, hparams.vocab_filename) hparams.parser = functools.partial( parsers.string_features_for_pretraining_parser, hparams.vocab_filename, hparams.encoder_type, hparams.max_input_len, hparams.max_target_len, hparams.max_total_words, hparams.parser_strategy, hparams.parser_masked_sentence_ratio, hparams.parser_masked_words_ratio, [ hparams.parser_mask_word_by_msk_token_prob, hparams.parser_mask_word_by_random_token_prob, hparams.parser_mask_word_by_intact_prob ], [ hparams.parser_mask_sentence_by_msk_token_prob, hparams.parser_mask_sentence_by_random_sentence_prob, hparams.parser_mask_sentence_by_intact_prob, hparams.parser_mask_sentence_by_remove_prob ], hparams.parser_rouge_ngrams_size, hparams.parser_rouge_metric_type, hparams.parser_rouge_compute_option, hparams.parser_rouge_stopwords_filename, NUM_RESERVED_TOKENS, parser_rouge_noise_ratio=hparams.parser_rouge_noise_ratio, parser_dynamic_mask_min_ratio=hparams.parser_dynamic_mask_min_ratio, input_feature="inputs", pretrain_target_filter_min=hparams.pretrain_target_filter_min, length_bucket_size=hparams.length_bucket_size, length_bucket_start_id=LENGTH_BUCKET_START_ID, length_bucket_max_id=TASK_START_ID - 1, add_task_id=hparams.add_task_id, task_start_id=TASK_START_ID) hparams.model = functools.partial( transformer.TransformerEncoderDecoderModel, hparams.encoder.vocab_size, hparams.hidden_size, hparams.filter_size, hparams.num_heads, hparams.num_encoder_layers, hparams.num_decoder_layers, hparams.label_smoothing, hparams.dropout) def decode_fn(features): return hparams.model().predict(features, hparams.max_decode_len, hparams.beam_size) hparams.estimator_prediction_fn = decode_fn hparams.eval = functools.partial(text_eval.text_eval, hparams.encoder, num_reserved=NUM_RESERVED_TOKENS) return hparams
beam_size, branch_factor, steps_per_iteration, modify_events_callback=modify_events_callback) def polyphonic_sequence_log_likelihood(self, sequence): """Evaluate the log likelihood of a polyphonic sequence. Args: sequence: The PolyphonicSequence object for which to evaluate the log likelihood. Returns: The log likelihood of `sequence` under this model. """ return self._evaluate_log_likelihood([sequence])[0] default_configs = { 'polyphony': events_rnn_model.EventSequenceRnnConfig( generator_pb2.GeneratorDetails( id='polyphony', description='Polyphonic RNN'), magenta.music.OneHotEventSequenceEncoderDecoder( polyphony_encoder_decoder.PolyphonyOneHotEncoding()), contrib_training.HParams( batch_size=64, rnn_layer_sizes=[256, 256, 256], dropout_keep_prob=0.5, clip_norm=5, learning_rate=0.001)), }
def transformer_params(patterns, param_overrides): """Params for TransformerEncoderDecoderMLModel. Args: patterns: a dict include train_pattern, dev_pattern, test_pattern param_overrides: a string, comma separated list of name=value Returns: A instance of HParams """ hparams = contrib_training.HParams( train_pattern=patterns["train_pattern"], dev_pattern=patterns["dev_pattern"], test_pattern=patterns["test_pattern"], vocab_filename="pegasus/ops/testdata/sp_test.model", encoder_type="sentencepiece_newline", length_bucket_size=0, add_task_id=False, batch_size=patterns["batch_size"], max_input_len=patterns["max_input_len"], max_target_len=patterns["max_output_len"], max_decode_len=patterns["max_output_len"], hidden_size=1024, filter_size=4096, num_heads=16, num_encoder_layers=16, num_decoder_layers=16, beam_size=1, beam_start=5, beam_alpha=0.8, beam_min=0, beam_max=-1, temperature=0.0, top_k=0, top_p=0.0, optimizer_name="adafactor", train_steps=patterns["train_steps"], learning_rate=patterns["learning_rate"], label_smoothing=0.1, dropout=0.1, eval_max_predictions=patterns.get("eval_steps", 1000), use_bfloat16=False, model=None, parser=None, encoder=None, estimator_prediction_fn=None, eval=None, estimator_eval_metrics_fn=estimator_metrics.gen_eval_metrics_fn, ) if param_overrides: hparams.parse(param_overrides) hparams.parser = functools.partial( parsers.supervised_strings_parser, hparams.vocab_filename, hparams.encoder_type, hparams.max_input_len, hparams.max_target_len, length_bucket_size=hparams.length_bucket_size, length_bucket_start_id=pegasus_params.LENGTH_BUCKET_START_ID, length_bucket_max_id=pegasus_params.TASK_START_ID - 1, add_task_id=hparams.add_task_id, task_start_id=pegasus_params.TASK_START_ID) hparams.encoder = public_parsing_ops.create_text_encoder( hparams.encoder_type, hparams.vocab_filename) hparams.model = functools.partial( transformer.TransformerEncoderDecoderModel, hparams.encoder.vocab_size, hparams.hidden_size, hparams.filter_size, hparams.num_heads, hparams.num_encoder_layers, hparams.num_decoder_layers, hparams.label_smoothing, hparams.dropout) beam_keys = ("beam_start", "beam_alpha", "beam_min", "beam_max", "temperature", "top_k", "top_p") beam_kwargs = { k: hparams.get(k) for k in beam_keys if k in hparams.values() } def decode_fn(features): return hparams.model().predict(features, hparams.max_decode_len, hparams.beam_size, **beam_kwargs) hparams.estimator_prediction_fn = decode_fn hparams.eval = functools.partial( text_eval.text_eval, hparams.encoder, num_reserved=pegasus_params.NUM_RESERVED_TOKENS) return hparams
def main(_): if FLAGS.dataset == 'cifar10': data_path = './cifar10_data/' assert FLAGS.train_size <= 50000 validation_size = 50000 - FLAGS.train_size elif FLAGS.dataset == 'cifar100': data_path = './cifar100_data/' assert FLAGS.train_size <= 50000 validation_size = 50000 - FLAGS.train_size elif FLAGS.dataset == 'svhn': data_path = './svhn_dataset/' assert FLAGS.train_size <= 73257 validation_size = 73257 - FLAGS.train_size else: raise ValueError('Invalid dataset: %s' % FLAGS.dataset) hparams = contrib_training.HParams( train_size=FLAGS.train_size, validation_size=validation_size, eval_test=1, dataset=FLAGS.dataset, extra_dataset=FLAGS.extra_dataset, frequency=FLAGS.frequency, amplitude=FLAGS.amplitude, data_path=data_path, batch_size=256, gradient_clipping_by_global_norm=5.0, dummy_f=FLAGS.dummy_f, augment_type=FLAGS.augment_type, mixup_alpha=FLAGS.mixup_alpha, num_augmentation_layers=FLAGS.num_augmentation_layers, augmentation_magnitude=FLAGS.augmentation_magnitude, augmentation_probability=FLAGS.augmentation_probability, freq_augment_amplitude=FLAGS.freq_augment_amplitude, freq_augment_ffrac=FLAGS.freq_augment_ffrac, apply_cutout=FLAGS.apply_cutout, apply_flip_crop=FLAGS.apply_flip_crop, num_epochs=FLAGS.num_epochs, weight_decay_rate=FLAGS.weight_decay_rate, lr=FLAGS.lr, model_name=FLAGS.model_name, is_gan_data=FLAGS.is_gan_data, use_fixup=FLAGS.use_fixup, use_batchnorm=FLAGS.use_batchnorm, use_gamma_swish=FLAGS.use_gamma_swish, init_beta=FLAGS.init_beta, init_gamma=FLAGS.init_gamma, noise_type=FLAGS.noise_type, spatial_frequency=FLAGS.spatial_frequency, noise_seed=FLAGS.noise_seed, noise_class=FLAGS.noise_class, max_accuracy=FLAGS.max_accuracy, min_loss=FLAGS.min_loss, teacher_model=FLAGS.teacher_model, distillation_alpha=FLAGS.distillation_alpha, normalize_amplitude=FLAGS.normalize_amplitude, ckpt_every=FLAGS.ckpt_every, ) tf.logging.info('All hparams : {}'.format(hparams)) if FLAGS.model_name == 'wrn_32': setattr(hparams, 'model_name', 'wrn') hparams.add_hparam('wrn_size', 32) elif FLAGS.model_name == 'wrn_160': setattr(hparams, 'model_name', 'wrn') hparams.add_hparam('wrn_size', 160) elif FLAGS.model_name == 'shake_shake_32': setattr(hparams, 'model_name', 'shake_shake') hparams.add_hparam('shake_shake_widen_factor', 2) elif FLAGS.model_name == 'shake_shake_96': setattr(hparams, 'model_name', 'shake_shake') hparams.add_hparam('shake_shake_widen_factor', 6) elif FLAGS.model_name == 'shake_shake_112': setattr(hparams, 'model_name', 'shake_shake') hparams.add_hparam('shake_shake_widen_factor', 7) elif FLAGS.model_name == 'pyramid_net': setattr(hparams, 'model_name', 'pyramid_net') hparams.batch_size = 64 else: raise ValueError('Not Valid Model Name: %s' % FLAGS.model_name) tf.logging.info('All hparams : {}'.format(hparams)) cifar_trainer = CifarModelTrainer(hparams) cifar_trainer.run_model()
def build_hparams(params=def_params): return training.HParams(**params)
Config = collections.namedtuple('Config', ('model_fn', 'hparams')) DEFAULT_HPARAMS = tf_utils.merge_hparams( audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS, contrib_training.HParams( eval_batch_size=1, predict_batch_size=1, onset_only_sequence_prediction=False, shuffle_buffer_size=64, sample_rate=16000, spec_type='mel', spec_mel_htk=True, spec_log_amplitude=True, spec_hop_length=512, spec_n_bins=229, spec_fmin=30.0, # A0 cqt_bins_per_octave=36, truncated_length_secs=0.0, max_expected_train_example_len=0, onset_length=32, offset_length=32, onset_mode='length_ms', onset_delay=0, min_frame_occupancy_for_label=0.0, jitter_amount_ms=0, min_duration_ms=0, backward_shift_amount_ms=0)) CONFIG_MAP = {} CONFIG_MAP['onsets_frames'] = Config(
def copy_hparams(hparams): """Return a copy of an HParams instance.""" return contrib_training.HParams(**hparams.values())
def setUp(self): self.config = events_rnn_model.EventSequenceRnnConfig( None, magenta.music.OneHotEventSequenceEncoderDecoder( polyphony_encoder_decoder.PolyphonyOneHotEncoding()), contrib_training.HParams())
def imagenet_hparams(): """Returns default ImageNet training params. These defaults are for full training. For search training, some should be modified to increase the speed of the search. """ return contrib_training.HParams( ########################################################################## # Input pipeline params. ################################################# ########################################################################## image_size=299, num_train_images=1281167, num_eval_images=50000, num_label_classes=1001, ########################################################################## # Architectural params. ################################################## ########################################################################## # The total number of regular cells (summed across all stacks). Reduction # cells are not included. num_cells=18, reduction_size=256, stem_reduction_size=32, # How many reduction cells to use between the stacks of regular cells. num_reduction_layers=2, # Stem. stem_type='imagenet', # 'imagenet' or others num_stem_cells=2, # 2 if stem_type == 'imagenet' else 0 # Implementation details. data_format='NCHW', # 'NHWC' or 'NCHW'. ########################################################################## # Training params. ####################################################### ########################################################################## # Summed across all TPU cores training a model. train_batch_size=32, num_epochs=100., # Auxiliary head. use_aux_head=True, aux_scaling=0.4, # Regularization. l1_decay_rate=0.0, label_smoothing=0.1, drop_connect_keep_prob=0.7, # `drop_connect_version` determines how the drop_connect probabilites are # set/increased over time: # -v1: increase dropout probability over training, # -v2: increase dropout probability as you increase the number of cells, # so the top cell has the highest dropout and the lowest cell has the # lowest dropout, # -v3: Do both v1 and v2. drop_connect_version='v1', drop_path_burn_in_steps=0, # `drop_connect_condition` determines under what conditions drop_connect # is used: # -identity: Dropout all paths except identity connections, # -all: Dropout all paths, # -separable: Dropout only paths containing a separable conv operation. dense_dropout_keep_prob=0.5, batch_norm_epsilon=0.001, batch_norm_decay=0.9997, shuffle_buffer=20000, # Any value <= 0 means it is unused gradient_clipping_by_global_norm=10.0, # Learning rate schedule. lr=0.015, lr_decay_method='exponential', lr_decay_value=0.97, lr_num_epochs_per_decay=2.4, lr_warmup_epochs=3.0, weight_decay=4e-05, # Optimizer. optimizer='rmsprop', # 'sgd', 'mom', 'adam' or 'rmsprop' rmsprop_decay=0.9, rmsprop_momentum_rate=0.9, rmsprop_epsilon=1.0, momentum_rate=0.9, use_nesterov=1, ########################################################################## # Eval and reporting params. ############################################# ########################################################################## # This number should be a multiple of the number of TPU shards # used for eval (e.g., 2 for a 1x1 or 8 for a 2x2). eval_batch_size=40, # How many different crops are fed into one model. Also affects training. num_input_images=1, moving_average_decay=0.9999, write_summaries=0, ########################################################################## # Other params. ########################################################## ########################################################################## num_shards=None, distributed_group_size=1, use_tpu=False)
default_configs = { 'basic_improv': ImprovRnnConfig( magenta.music.protobuf.generator_pb2.GeneratorDetails( id='basic_improv', description='Basic melody-given-chords RNN with one-hot triad ' 'encoding for chords.'), magenta.music.ConditionalEventSequenceEncoderDecoder( magenta.music.OneHotEventSequenceEncoderDecoder( magenta.music.TriadChordOneHotEncoding()), magenta.music.OneHotEventSequenceEncoderDecoder( magenta.music.MelodyOneHotEncoding( min_note=DEFAULT_MIN_NOTE, max_note=DEFAULT_MAX_NOTE))), contrib_training.HParams( batch_size=128, rnn_layer_sizes=[64, 64], dropout_keep_prob=0.5, clip_norm=5, learning_rate=0.001)), 'attention_improv': ImprovRnnConfig( magenta.music.protobuf.generator_pb2.GeneratorDetails( id='attention_improv', description='Melody-given-chords RNN with one-hot triad encoding ' 'for chords, attention, and binary counters.'), magenta.music.ConditionalEventSequenceEncoderDecoder( magenta.music.OneHotEventSequenceEncoderDecoder( magenta.music.TriadChordOneHotEncoding()), magenta.music.KeyMelodyEncoderDecoder( min_note=DEFAULT_MIN_NOTE, max_note=DEFAULT_MAX_NOTE)), contrib_training.HParams( batch_size=128,
def get_hparams(**kwargs): """Get the hyperparameters for the model from a json object. Args: **kwargs: Dict of parameter overrides. Possible keyword arguments: atom_types: Dict. The possible atom types in the molecule. max_steps_per_episode: Integer. The maximum number of steps for one episode. allow_removal: Boolean. Whether to allow removal of a bond. allow_no_modification: Boolean. If true, the valid action set will include doing nothing to the current molecule, i.e., the current molecule itself will be added to the action set. replay_buffer_size: Integer. The size of the replay buffer. learning_rate: Float. Learning rate. learning_rate_decay_steps: Integer. The number of steps between each learning rate decay. learning_rate_decay_rate: Float. The rate of learning rate decay. num_episodes: Integer. Number of episodes to run. batch_size: Integer. The batch size. learning_frequency: Integer. The number of steps between each training operation. update_frequency: Integer. The number of steps between each update of the target Q network grad_clipping: Integer. maximum value of the gradient norm. gamma: Float. The discount factor for the reward. double_q: Boolean. Whether to used double Q learning. See https://arxiv.org/abs/1509.06461 for detail. bootstrap: Integer. The number of bootstrap heads. See https://arxiv.org/abs/1703.07608 for detail. prioritized: Boolean. Whether to use prioritized replay. See https://arxiv.org/abs/1511.05952 for detail. prioritized_alpha: Float. The parameter alpha in the prioritized replay. prioritized_beta: Float. The parameter beta in the prioritized replay. prioritized_epsilon: Float. The parameter epsilon in the prioritized replay. fingerprint_radius: Integer. The radius of the Morgan fingerprint. fingerprint_length: Integer. The length of the Morgan fingerprint. dense_layers: List of integers. The hidden units in the dense layers. activation: String. The activation function to use. optimizer: String. The optimizer to use. batch_norm: Boolean. Whether to use batch normalization. save_frequency: Integer. The number of episodes between each saving. Returns: A HParams object containing all the hyperparameters. """ hparams = contrib_training.HParams( atom_types=['C', 'O', 'N'], max_steps_per_episode=40, allow_removal=True, allow_no_modification=True, allow_bonds_between_rings=False, allowed_ring_sizes=[3, 4, 5, 6], replay_buffer_size=1000000, learning_rate=1e-4, learning_rate_decay_steps=10000, learning_rate_decay_rate=0.8, num_episodes=5000, batch_size=64, learning_frequency=4, update_frequency=20, grad_clipping=10.0, gamma=0.9, double_q=True, num_bootstrap_heads=12, prioritized=False, prioritized_alpha=0.6, prioritized_beta=0.4, prioritized_epsilon=1e-6, fingerprint_radius=3, fingerprint_length=2048, dense_layers=[1024, 512, 128, 32], activation='relu', optimizer='Adam', batch_norm=False, save_frequency=1000, max_num_checkpoints=100, discount_factor=0.7) return hparams.override_from_dict(kwargs)
def run_trial(trial_idx, delta, algo_names): """Runs a trial of wheel bandit problem instance for a set of algorithms.""" filename = os.path.join(FLAGS.datasetdir, str(delta) + '_' + str(trial_idx) + '.npz') with gfile.GFile(filename, 'r') as f: sampled_vals = np.load(f) dataset = sampled_vals['dataset'] opt_rewards = sampled_vals['opt_rewards'] x_hidden_size = 100 x_encoder_sizes = [x_hidden_size] * 2 algos = [] for algo_name in algo_names: if algo_name == 'uniform': hparams = contrib_training.HParams(num_actions=num_actions) algos.append(uniform_sampling.UniformSampling(algo_name, hparams)) elif algo_name == 'neurolinear': hparams = contrib_training.HParams(num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, layer_sizes=x_encoder_sizes, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=1, training_freq_network=20, training_epochs=50, a0=12, b0=30, lambda_prior=23) algos.append( neural_linear_sampling.NeuralLinearPosteriorSampling( algo_name, hparams)) elif algo_name == 'multitaskgp': hparams_gp = contrib_training.HParams( num_actions=num_actions, num_outputs=num_actions, context_dim=context_dim, reset_lr=False, learn_embeddings=True, max_num_points=1000, show_training=False, freq_summary=1000, batch_size=512, keep_fixed_after_max_obs=True, training_freq=20, initial_pulls=2, training_epochs=50, lr=0.01, buffer_s=-1, initial_lr=0.001, lr_decay_rate=0.0, optimizer='RMS', task_latent_dim=5, activate_decay=False) algos.append( posterior_bnn_sampling.PosteriorBNNSampling( algo_name, hparams_gp, 'GP')) elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp': hidden_size = 64 latent_units = 32 global_latent_net_sizes = [hidden_size] * 2 + [2 * latent_units] local_latent_net_sizes = [hidden_size] * 3 + [2] x_y_encoder_sizes = [hidden_size] * 3 heteroskedastic_net_sizes = None mean_att_type = attention.laplace_attention scale_att_type_1 = attention.laplace_attention scale_att_type_2 = attention.laplace_attention att_type = 'multihead' att_heads = 8 data_uncertainty = False is_anp = False config = algo_name.split('_') mfile = FLAGS.prefix + config[1] + '_' + config[2] + FLAGS.suffix if algo_name[:3] == 'anp': mfile = 'anp_' + mfile local_latent_net_sizes = [hidden_size] * 3 + [2 * 5] is_anp = True mpath = os.path.join(FLAGS.modeldir, mfile) hparams = contrib_training.HParams( num_actions=num_actions, context_dim=context_dim, init_scale=0.3, activation=tf.nn.relu, output_activation=tf.nn.relu, x_encoder_sizes=x_encoder_sizes, x_y_encoder_sizes=x_y_encoder_sizes, global_latent_net_sizes=global_latent_net_sizes, local_latent_net_sizes=local_latent_net_sizes, heteroskedastic_net_sizes=heteroskedastic_net_sizes, att_type=att_type, att_heads=att_heads, mean_att_type=mean_att_type, scale_att_type_1=scale_att_type_1, scale_att_type_2=scale_att_type_2, data_uncertainty=data_uncertainty, batch_size=512, activate_decay=True, initial_lr=0.1, max_grad_norm=5.0, show_training=False, freq_summary=1000, buffer_s=-1, initial_pulls=2, reset_lr=True, lr_decay_rate=0.5, training_freq=10, training_freq_network=20, training_epochs=50, uncertainty_type='attentive_freeform', local_variational=True, model_path=mpath, is_anp=is_anp) if config[1] == 'prior': hparams.set_hparam('local_variational', False) if config[2] == 'gp': hparams.set_hparam('uncertainty_type', 'attentive_gp') algos.append( offline_contextual_bandits.OfflineContextualBandits( algo_name, hparams)) t_init = time.time() _, h_rewards = contextual_bandit.run_contextual_bandit( context_dim, num_actions, dataset, algos, num_contexts=FLAGS.num_contexts) t_final = time.time() return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]
def main(_): tf.logging.set_verbosity(tf.logging.INFO) random.seed(FLAGS.random_seed) params = contrib_training.HParams( embedding=FLAGS.embedding, num_steps=FLAGS.num_steps, val_freq=FLAGS.val_freq, seq_len=FLAGS.seq_len, batch_size=FLAGS.batch_size, emb_size=FLAGS.emb_size, vocab_size=4, hidden_lstm_size=FLAGS.hidden_lstm_size, hidden_dense_size=FLAGS.hidden_dense_size, dropout_rate=FLAGS.dropout_rate, learning_rate=FLAGS.learning_rate, num_motifs=FLAGS.num_motifs, len_motifs=FLAGS.len_motifs, temperature=FLAGS.temperature, reweight_sample=FLAGS.reweight_sample, l2_reg=FLAGS.l2_reg, out_dir=FLAGS.out_dir, in_tr_data_dir=FLAGS.in_tr_data_dir, in_val_data_dir=FLAGS.in_val_data_dir, ood_val_data_dir=FLAGS.ood_val_data_dir, master=FLAGS.master, save_meta=FLAGS.save_meta, label_dict_file=FLAGS.label_dict_file, mutation_rate=FLAGS.mutation_rate, epsilon=FLAGS.epsilon, ) # create output directories create_out_dir(params) # load datasets and labels for training params.add_hparam('in_tr_file_pattern', 'in_tr') params.add_hparam('in_val_file_pattern', 'in_val') params.add_hparam('ood_val_file_pattern', 'ood_val') label_sample_size, in_tr_dataset, in_val_dataset, ood_val_dataset = load_datasets_and_labels( params) params.add_hparam('n_class', len(label_sample_size)) tf.logging.info('label_sample_size=%s', label_sample_size) # compute weights for labels # load the dictionary for class labels. # Key: class name (string), values: encoded class label (int) with tf.gfile.GFile(os.path.join(params.label_dict_file), 'rb') as f_label_code: # label_dict_after_2016_new_species0 = json.load(f) params.add_hparam('label_dict', yaml.safe_load(f_label_code)) tf.logging.info('# of label_dict=%s', len(params.label_dict)) label_weights = utils.compute_label_weights_using_sample_size( params.label_dict, label_sample_size) params.add_hparam('label_weights', label_weights) # print parameter settings tf.logging.info(params) with tf.gfile.GFile(os.path.join(params.model_dir, 'params.json'), mode='w') as f: f.write(json.dumps(params.to_json(), sort_keys=True)) # construct model tf.logging.info('create model') model = SeqPredModel(params) model.reset() ## if previous model ckpt exists, restore the model from there tf.logging.info('model dir=%s', os.path.join(params.model_dir, '*.ckpt.index')) prev_steps, ckpt_file = utils.get_latest_ckpt(params.model_dir) if ckpt_file: tf.logging.info('previous ckpt exist, prev_steps=%s', prev_steps) model.restore_from_ckpt(ckpt_file) # training tf.logging.info('strart training') model.train(in_tr_dataset, in_val_dataset, ood_val_dataset, prev_steps)
def _assertOptimizerWithNewLearningRate(self, optimizer_name): """Asserts successful updating of all learning rate schemes.""" original_learning_rate = 0.7 learning_rate_scaling = 0.1 warmup_learning_rate = 0.07 hparams = contrib_training.HParams(learning_rate=0.15) pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config") # Constant learning rate. pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name) _update_optimizer_with_constant_learning_rate(optimizer, original_learning_rate) _write_config(pipeline_config, pipeline_config_path) configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) configs = config_util.merge_external_params_with_configs(configs, hparams) optimizer = getattr(configs["train_config"].optimizer, optimizer_name) constant_lr = optimizer.learning_rate.constant_learning_rate self.assertAlmostEqual(hparams.learning_rate, constant_lr.learning_rate) # Exponential decay learning rate. pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name) _update_optimizer_with_exponential_decay_learning_rate( optimizer, original_learning_rate) _write_config(pipeline_config, pipeline_config_path) configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) configs = config_util.merge_external_params_with_configs(configs, hparams) optimizer = getattr(configs["train_config"].optimizer, optimizer_name) exponential_lr = optimizer.learning_rate.exponential_decay_learning_rate self.assertAlmostEqual(hparams.learning_rate, exponential_lr.initial_learning_rate) # Manual step learning rate. pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name) _update_optimizer_with_manual_step_learning_rate( optimizer, original_learning_rate, learning_rate_scaling) _write_config(pipeline_config, pipeline_config_path) configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) configs = config_util.merge_external_params_with_configs(configs, hparams) optimizer = getattr(configs["train_config"].optimizer, optimizer_name) manual_lr = optimizer.learning_rate.manual_step_learning_rate self.assertAlmostEqual(hparams.learning_rate, manual_lr.initial_learning_rate) for i, schedule in enumerate(manual_lr.schedule): self.assertAlmostEqual(hparams.learning_rate * learning_rate_scaling**i, schedule.learning_rate) # Cosine decay learning rate. pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name) _update_optimizer_with_cosine_decay_learning_rate(optimizer, original_learning_rate, warmup_learning_rate) _write_config(pipeline_config, pipeline_config_path) configs = config_util.get_configs_from_pipeline_file(pipeline_config_path) configs = config_util.merge_external_params_with_configs(configs, hparams) optimizer = getattr(configs["train_config"].optimizer, optimizer_name) cosine_lr = optimizer.learning_rate.cosine_decay_learning_rate self.assertAlmostEqual(hparams.learning_rate, cosine_lr.learning_rate_base) warmup_scale_factor = warmup_learning_rate / original_learning_rate self.assertAlmostEqual(hparams.learning_rate * warmup_scale_factor, cosine_lr.warmup_learning_rate)