Beispiel #1
0
def eval_function(hparams_dict):
    """
    This function takes a haperparameter configuration, trains the
    corresponding model on the training data set, creates the predictions,
    and returns the evaluated MAPE on the evaluation data set.
    """
    # set the data directory
    file_dir = os.path.dirname(
        os.path.abspath(inspect.getfile(inspect.currentframe())))
    data_dir = os.path.join(file_dir, data_relative_dir)

    hparams_dict = dict(hparams_dict)
    for key in LIST_HYPERPARAMETER:
        hparams_dict[key] = [hparams_dict[key]]

    # add the value of other hyper parameters which are not tuned
    hparams_dict["encoder_rnn_layers"] = 1
    hparams_dict["decoder_rnn_layers"] = 1
    hparams_dict["decoder_variational_dropout"] = [False]
    hparams_dict["asgd_decay"] = None

    hparams = training.HParams(**hparams_dict)
    # use round 1 training data for hyper parameter tuning to avoid data leakage for later rounds
    submission_round = 1
    make_features_flag = False
    train_model_flag = True
    train_back_offset = 3  # equal to predict_window
    predict_cut_mode = "eval"

    # get prediction
    pred_o, train_mape = create_round_prediction(
        data_dir,
        submission_round,
        hparams,
        make_features_flag=make_features_flag,
        train_model_flag=train_model_flag,
        train_back_offset=train_back_offset,
        predict_cut_mode=predict_cut_mode,
    )
    # get rid of prediction at horizon 1
    pred_sub = pred_o[:, 1:].reshape((-1))

    # evaluate the prediction on last two days in the first round training data
    # TODO: get train error and evalution error for different parameters
    train_file = os.path.join(
        data_dir, "train/train_round_{}.csv".format(submission_round))
    train = pd.read_csv(train_file, index_col=False)
    train_last_week = bs.TRAIN_END_WEEK_LIST[submission_round - 1]
    # filter the train to contain ony last two days' data
    train = train.loc[train["week"] >= train_last_week - 1]

    # create the data frame without missing dates
    store_list = train["store"].unique()
    brand_list = train["brand"].unique()
    week_list = range(train_last_week - 1, train_last_week + 1)
    item_list = list(itertools.product(store_list, brand_list, week_list))
    item_df = pd.DataFrame.from_records(item_list,
                                        columns=["store", "brand", "week"])

    train = item_df.merge(train, how="left", on=["store", "brand", "week"])
    result = train.sort_values(by=["store", "brand", "week"], ascending=True)
    result["prediction"] = pred_sub
    result["sales"] = result["logmove"].apply(lambda x: round(np.exp(x)))

    # calculate MAPE on the evaluate set
    result = result.loc[result["sales"].notnull()]
    eval_mape = MAPE(result["prediction"], result["sales"])
    return eval_mape
Beispiel #2
0
__author__ = 'KKishore'

import tensorflow as tf
from tensorflow.contrib import training

from model.cnn_model import model_fn, input_fn, serving_fn

tf.logging.set_verbosity(tf.logging.INFO)

N_WORDS = 0

with open('data/nwords.csv', 'r') as f:
    N_WORDS = int(f.read()) + 2

hparams = training.HParams(N_WORDS=N_WORDS)

print(N_WORDS)

estimator = tf.estimator.Estimator(model_fn=model_fn,
                                   params=hparams,
                                   model_dir='build/')

estimator.train(
    input_fn=lambda: input_fn('data/train.tsv', shuffle=True, repeat_count=5))

evaluated_results = estimator.evaluate(
    input_fn=lambda: input_fn('data/dev.tsv', shuffle=False, repeat_count=1))

print("# Evaluated Results: {}".format(evaluated_results))

estimator.export_savedmodel(export_dir_base='serving',
Beispiel #3
0
def run_trial(trial_idx, delta, algo_names):
    """Runs a trial of wheel bandit problem instance for a set of algorithms."""

    filename = os.path.join(FLAGS.datasetdir,
                            str(delta) + '_' + str(trial_idx) + '.npz')
    with gfile.GFile(filename, 'r') as f:
        sampled_vals = np.load(f)
        dataset = sampled_vals['dataset']
        opt_rewards = sampled_vals['opt_rewards']

    x_hidden_size = 100
    x_encoder_sizes = [x_hidden_size] * 2

    algos = []
    for algo_name in algo_names:
        if algo_name == 'uniform':
            hparams = contrib_training.HParams(num_actions=num_actions)
            algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
        elif algo_name == 'neurolinear':
            hparams = contrib_training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               output_activation=tf.nn.relu,
                                               layer_sizes=x_encoder_sizes,
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=1,
                                               training_freq_network=20,
                                               training_epochs=50,
                                               a0=12,
                                               b0=30,
                                               lambda_prior=23)
            algos.append(
                neural_linear_sampling.NeuralLinearPosteriorSampling(
                    algo_name, hparams))
        elif algo_name == 'multitaskgp':
            hparams_gp = contrib_training.HParams(
                num_actions=num_actions,
                num_outputs=num_actions,
                context_dim=context_dim,
                reset_lr=False,
                learn_embeddings=True,
                max_num_points=1000,
                show_training=False,
                freq_summary=1000,
                batch_size=512,
                keep_fixed_after_max_obs=True,
                training_freq=20,
                initial_pulls=2,
                training_epochs=50,
                lr=0.01,
                buffer_s=-1,
                initial_lr=0.001,
                lr_decay_rate=0.0,
                optimizer='RMS',
                task_latent_dim=5,
                activate_decay=False)
            algos.append(
                posterior_bnn_sampling.PosteriorBNNSampling(
                    algo_name, hparams_gp, 'GP'))
        elif algo_name[:3] == 'gnp':
            hidden_size = 64
            x_encoder_net_sizes = None
            decoder_net_sizes = [hidden_size] * 3 + [2 * num_actions]
            heteroskedastic_net_sizes = None
            att_type = 'multihead'
            att_heads = 8
            data_uncertainty = False
            config = algo_name.split('_')
            model_type = config[1]
            if algo_name[:len('gnp_anp_beta_')] == 'gnp_anp_beta_':
                mfile = algo_name + FLAGS.suffix
                x_y_encoder_net_sizes = [hidden_size] * 3
                global_latent_net_sizes = [hidden_size] * 2
                local_latent_net_sizes = None
                beta = float(config[3])
                temperature = float(config[5])
            else:
                mfile = FLAGS.prefix + config[1] + FLAGS.suffix
                if model_type == 'cnp':
                    x_y_encoder_net_sizes = [hidden_size] * 4
                    global_latent_net_sizes = None
                    local_latent_net_sizes = None
                elif model_type == 'np':
                    x_y_encoder_net_sizes = [hidden_size] * 2
                    global_latent_net_sizes = [hidden_size] * 2
                    local_latent_net_sizes = None
                elif model_type == 'anp':
                    x_y_encoder_net_sizes = [hidden_size] * 2
                    global_latent_net_sizes = [hidden_size] * 2
                    local_latent_net_sizes = None
                elif model_type == 'acnp':
                    x_y_encoder_net_sizes = [hidden_size] * 4
                    global_latent_net_sizes = None
                    local_latent_net_sizes = None
                elif model_type == 'acns':
                    x_y_encoder_net_sizes = [hidden_size] * 2
                    global_latent_net_sizes = [hidden_size] * 2
                    local_latent_net_sizes = [hidden_size] * 2

                beta = 1.
                temperature = 1.

            mpath = os.path.join(FLAGS.modeldir, mfile)

            hparams = contrib_training.HParams(
                num_actions=num_actions,
                context_dim=context_dim,
                init_scale=0.3,
                activation=tf.nn.relu,
                output_activation=tf.nn.relu,
                x_encoder_net_sizes=x_encoder_net_sizes,
                x_y_encoder_net_sizes=x_y_encoder_net_sizes,
                global_latent_net_sizes=global_latent_net_sizes,
                local_latent_net_sizes=local_latent_net_sizes,
                decoder_net_sizes=decoder_net_sizes,
                heteroskedastic_net_sizes=heteroskedastic_net_sizes,
                att_type=att_type,
                att_heads=att_heads,
                model_type=model_type,
                data_uncertainty=data_uncertainty,
                beta=beta,
                temperature=temperature,
                model_path=mpath,
                batch_size=512,
                activate_decay=True,
                initial_lr=0.1,
                max_grad_norm=5.0,
                show_training=False,
                freq_summary=1000,
                buffer_s=-1,
                initial_pulls=2,
                reset_lr=True,
                lr_decay_rate=0.5,
                training_freq=10,
                training_freq_network=20,
                training_epochs=50)

            algos.append(
                offline_contextual_bandits_gnp.OfflineContextualBandits(
                    algo_name, hparams))

    t_init = time.time()
    _, h_rewards = contextual_bandit.run_contextual_bandit(
        context_dim,
        num_actions,
        dataset,
        algos,
        num_contexts=FLAGS.num_contexts)  # pytype: disable=wrong-keyword-args
    t_final = time.time()

    return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]
Beispiel #4
0
    return self._generate_events(
        num_steps=num_steps, primer_events=primer_sequence, temperature=None,
        beam_size=beam_size, branch_factor=branch_factor,
        steps_per_iteration=steps_per_iteration)


default_configs = {
    'rnn-nade':
        events_rnn_model.EventSequenceRnnConfig(
            magenta.protobuf.generator_pb2.GeneratorDetails(
                id='rnn-nade', description='RNN-NADE'),
            mm.PianorollEncoderDecoder(),
            contrib_training.HParams(
                batch_size=64,
                rnn_layer_sizes=[128, 128, 128],
                nade_hidden_units=128,
                dropout_keep_prob=0.5,
                clip_norm=5,
                learning_rate=0.001)),
    'rnn-nade_attn':
        events_rnn_model.EventSequenceRnnConfig(
            magenta.protobuf.generator_pb2.GeneratorDetails(
                id='rnn-nade_attn', description='RNN-NADE with attention.'),
            mm.PianorollEncoderDecoder(),
            contrib_training.HParams(
                batch_size=48,
                rnn_layer_sizes=[128, 128],
                attn_length=32,
                nade_hidden_units=128,
                dropout_keep_prob=0.5,
                clip_norm=5,
Beispiel #5
0
def get_pruning_hparams():
    """Get a tf.HParams object with the default values for the hyperparameters.

    name: string
      name of the pruning specification. Used for adding summaries and ops under
      a common tensorflow name_scope
    begin_pruning_step: integer
      the global step at which to begin pruning
    end_pruning_step: integer
      the global step at which to terminate pruning. Defaults to -1 implying
      that pruning continues till the training stops
    weight_sparsity_map: list of strings
       comma separed list of {weight_variable_name:target sparsity} or
       {regex:target sparsity} pairs.
       For layers/weights not in this list, sparsity as specified by the
       target_sparsity hyperparameter is used.
       Eg. [conv1:0.9,conv2/kernel:0.8]
    block_dims_map: list of strings
       comma separated list of {weight variable name:block_height x block_width}
       or {regex:block_height x block_width} pairs. For layers/weights not in
       this list, block dims are specified by the block_height, block_width
       hyperparameters are used Eg. [dense1:4x4,dense2:1x16,dense3:1x1]
    threshold_decay: float
      the decay factor to use for exponential decay of the thresholds
    pruning_frequency: integer
      How often should the masks be updated? (in # of global_steps)
    nbins: integer
      number of bins to use for histogram computation
    block_height: integer
      number of rows in a block (defaults to 1), can be -1 in which
      case it is set to the size of the corresponding weight tensor.
    block_width: integer
      number of cols in a block (defaults to 1), can be -1 in which
      case it is set to the size of the corresponding weight tensor.
    block_pooling_function: string
      Whether to perform average (AVG) or max (MAX) pooling in the block
      (default: AVG)
    initial_sparsity: float
      initial sparsity value
    target_sparsity: float
      target sparsity value
    sparsity_function_begin_step: integer
      the global step at this which the gradual sparsity function begins to
      take effect
    sparsity_function_end_step: integer
      the global step used as the end point for the gradual sparsity function
    sparsity_function_exponent: float
      exponent = 1 is linearly varying sparsity between initial and final.
      exponent > 1 varies more slowly towards the end than the beginning
    use_tpu: False
      Indicates whether to use TPU
    gradient_decay_rate: float
      when prune_option is gradient based pruning, decay factor for gradient
      decay
    prune_option: string
      option = 'weight' means using |weight| for pruning.
      option = 'first_order_gradient' means using |weight| * |first order
      gradient| for pruning.
      option = 'second_order_gradient' means using |weight| * |second order
      gradient| for pruning.
        second order gradient is approximated by |weight + old_old_weight -
        2*old_weight|.
      option = 'compression' means using compression.
    alpha_decrement_value: only effective when prune_option is 'compression',
      see graph_compression/compression_lib/compression_op.py. The following
      arguments are all only effective when prune_option == 'compression', see
      graph_compression/compression_lib/compression_op.py for details.
    begin_compression_step: only effective when prune_option is 'compression',
                           see graph_compression/compression_op.py.
    end_compresson_step: only effective when prune_option is 'compression',
                           see graph_compression/compression_op.py.
    compression_frequency: only effective when prune_option is 'compression',
                           see graph_compression/compression_op.py.
    compression_option: only effective when prune_option is 'compression',
                        see graph_compression/compression_op.py.
    rank: only effective when prune_option is 'compression',
          see graph_compression/compression_op.py.
    update_option: only effective when prune_option is 'compression',
                   see graph_compression/compression_op.py.
    run_update_interval_check: only effective when prune_option is 'compression'
                               see graph_compression/compression_op.py.
    pruning_fraction: only effective when prune_option is 'compression',
                      see graph_compression/compression_op.py.

    We use the following sparsity function:

    num_steps = (sparsity_function_end_step -
                 sparsity_function_begin_step)/pruning_frequency
    sparsity(step) = (initial_sparsity - target_sparsity)*
                     [1-step/(num_steps -1)]**exponent + target_sparsity

  Args: None

  Returns:
    tf.HParams object initialized to default values

  """
    return contrib_training.HParams(name='model_pruning',
                                    begin_pruning_step=0,
                                    end_pruning_step=-1,
                                    weight_sparsity_map=[''],
                                    block_dims_map=[''],
                                    threshold_decay=0.0,
                                    pruning_frequency=10,
                                    nbins=256,
                                    block_height=1,
                                    block_width=1,
                                    block_pooling_function='AVG',
                                    initial_sparsity=0.0,
                                    target_sparsity=0.5,
                                    sparsity_function_begin_step=0,
                                    sparsity_function_end_step=100,
                                    sparsity_function_exponent=3.0,
                                    use_tpu=False,
                                    gradient_decay_rate=0.99,
                                    prune_option='weight',
                                    alpha_decrement_value=0.01,
                                    begin_compression_step=0,
                                    end_compresson_step=-1,
                                    compression_frequency=10,
                                    compression_option=0,
                                    rank=7,
                                    update_option=0,
                                    run_update_interval_check=1,
                                    pruning_fraction=0.4)
Beispiel #6
0
def run_trial(trial_idx, delta, algo_names):
    """Runs a trial of wheel bandit problem instance for a set of algorithms."""

    all_algo_names = '_'.join(algo_names)
    runfile = str(delta) + '_' + str(trial_idx) + '_' + all_algo_names + '.pkl'
    savefile = os.path.join(FLAGS.savedir, runfile)
    if gfile.Exists(savefile):
        print('File exists...terminating')
        with gfile.Open(savefile, 'rb') as infile:
            saved_state = pickle.load(infile, encoding='latin-1')
        return saved_state['h_rewards'], saved_state['time']

    filename = os.path.join(FLAGS.datasetdir,
                            str(delta) + '_' + str(trial_idx) + '.npz')
    with gfile.GFile(filename, 'r') as f:
        sampled_vals = np.load(f)
        dataset = sampled_vals['dataset']

    x_hidden_size = 100
    x_encoder_sizes = [x_hidden_size] * 2

    algos = []
    ckptfile = None
    save_once = False
    for algo_name in algo_names:
        if algo_name == 'uniform':
            hparams = contrib_training.HParams(num_actions=num_actions)
            algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
        elif algo_name == 'neurolinear':
            hparams = contrib_training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               output_activation=tf.nn.relu,
                                               layer_sizes=x_encoder_sizes,
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=1,
                                               training_freq_network=20,
                                               training_epochs=50,
                                               a0=12,
                                               b0=30,
                                               lambda_prior=23)
            algos.append(
                neural_linear_sampling.NeuralLinearPosteriorSampling(
                    algo_name, hparams))
        elif algo_name == 'multitaskgp':
            hparams_gp = contrib_training.HParams(
                num_actions=num_actions,
                num_outputs=num_actions,
                context_dim=context_dim,
                reset_lr=False,
                learn_embeddings=True,
                max_num_points=1000,
                show_training=False,
                freq_summary=1000,
                batch_size=512,
                keep_fixed_after_max_obs=True,
                training_freq=20,
                initial_pulls=2,
                training_epochs=50,
                lr=0.01,
                buffer_s=-1,
                initial_lr=0.001,
                lr_decay_rate=0.0,
                optimizer='RMS',
                task_latent_dim=5,
                activate_decay=False)
            algos.append(
                posterior_bnn_sampling.PosteriorBNNSampling(
                    algo_name, hparams_gp, 'GP'))
        elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp':
            hidden_size = 64
            latent_units = 32
            global_latent_net_sizes = [hidden_size] * 2 + [2 * latent_units]
            if algo_name[:3] == 'snp':
                local_latent_net_sizes = [hidden_size] * 3 + [2]
            else:
                local_latent_net_sizes = [hidden_size] * 3 + [2 * 5]
            x_y_encoder_sizes = [hidden_size] * 3
            heteroskedastic_net_sizes = None
            mean_att_type = attention.laplace_attention
            scale_att_type_1 = attention.laplace_attention
            scale_att_type_2 = attention.laplace_attention
            att_type = 'multihead'
            att_heads = 8
            data_uncertainty = False
            is_anp = True if algo_name[:3] == 'anp' else False

            hparams = contrib_training.HParams(
                num_actions=num_actions,
                context_dim=context_dim,
                init_scale=0.3,
                activation=tf.nn.relu,
                output_activation=tf.nn.relu,
                x_encoder_sizes=x_encoder_sizes,
                x_y_encoder_sizes=x_y_encoder_sizes,
                global_latent_net_sizes=global_latent_net_sizes,
                local_latent_net_sizes=local_latent_net_sizes,
                heteroskedastic_net_sizes=heteroskedastic_net_sizes,
                att_type=att_type,
                att_heads=att_heads,
                mean_att_type=mean_att_type,
                scale_att_type_1=scale_att_type_1,
                scale_att_type_2=scale_att_type_2,
                data_uncertainty=data_uncertainty,
                batch_size=512,
                activate_decay=True,
                initial_lr=0.1,
                max_grad_norm=5.0,
                show_training=False,
                freq_summary=1000,
                buffer_s=-1,
                initial_pulls=2,
                reset_lr=True,
                lr_decay_rate=0.5,
                training_freq=10,
                training_freq_network=20,
                training_epochs=50,
                uncertainty_type='attentive_freeform',
                local_variational=True,
                model_path=None,
                is_anp=is_anp)

            config = algo_name.split('_')
            if config[1] == 'prior':
                hparams.set_hparam('local_variational', False)

            if config[2] == 'gp':
                hparams.set_hparam('uncertainty_type', 'attentive_gp')

            if config[3] == 'warmstart' or config[3] == 'offline':
                mfile = FLAGS.prefix + config[1] + '_' + config[
                    2] + FLAGS.suffix
                if algo_name[:3] == 'anp':
                    mfile = 'anp_' + mfile
                mpath = os.path.join(FLAGS.modeldir, mfile)
                hparams.set_hparam('model_path', mpath)

            if config[3] == 'online' or config[3] == 'warmstart':
                algos.append(
                    online_contextual_bandits.OnlineContextualBandits(
                        algo_name, hparams))
            else:
                algos.append(
                    offline_contextual_bandits.OfflineContextualBandits(
                        algo_name, hparams))
                ckptfile = os.path.join(FLAGS.ckptdir, runfile)
                if gfile.Exists(ckptfile):
                    save_once = True

    t_init = time.time()
    print('started')
    _, h_rewards = run_contextual_bandit(dataset,
                                         algos,
                                         save_once=save_once,
                                         pkl_file=ckptfile)
    t_final = time.time()

    savedict = {'h_rewards': h_rewards, 'time': t_final - t_init}
    with gfile.Open(savefile, 'wb') as outfile:
        pickle.dump(savedict, outfile)
    return h_rewards, t_final - t_init
Beispiel #7
0
    }),

    # Reverb (for now just single-parameter).
    ('reverb', {
        'reverberance': (0.0, 70.0, 'linear'),
    }),
]

# Default hyperparameter values from the above pipeline. Note the additional
# `transform_audio` hparam that defaults to False, i.e. by default no audio
# transformation will be performed.
DEFAULT_AUDIO_TRANSFORM_HPARAMS = contrib_training.HParams(
    transform_audio=False,
    audio_transform_noise_type='pinknoise',
    audio_transform_min_noise_vol=0.0,
    audio_transform_max_noise_vol=0.04,
    **dict(('audio_transform_%s_%s_%s' % (m, stage_name, param_name), value)
           for stage_name, params_dict in AUDIO_TRANSFORM_PIPELINE
           for param_name, (min_value, max_value, _) in params_dict.items()
           for m, value in [('min', min_value), ('max', max_value)]))


class AudioTransformParameter(object):
    """An audio transform parameter with min and max value."""
    def __init__(self, name, min_value, max_value, scale):
        """Initialize an AudioTransformParameter.

    Args:
      name: The name of the parameter. Should be the same as the name of the
          parameter passed to sox.
      min_value: The minimum value of the parameter, a float.
Beispiel #8
0
    def testIntegratedGradientAttribution(self):
        # Due to complexity of the indicator we cannot easily extend this test to
        # > 1 lab test.
        obs_values = tf.constant([[[10000.0], [15000.0], [2.0]],
                                  [[0.0], [100.0], [2000.0]]])

        # We compare these values to a linear interpolation between the second to
        # the last and the last value of the test.
        obs_values_base = tf.constant([[[10000.0], [15000.0], [15000.0]],
                                       [[0.0], [100.0], [100.0]]])
        # For this test we need to select all attributions in order for consistency
        # to hold.
        indicator = tf.ones(shape=[2, 3, 1], dtype=tf.float32)
        delta_time = tf.constant([[[1000], [999], [2]], [[1001], [500], [20]]],
                                 dtype=tf.float32)
        # Selected so that the attribution is only over the third time step in both
        # batch entries.
        attribution_max_delta_time = 100
        num_classes = 1

        diff_delta_time = tf.constant(
            [[[1000], [1], [997]], [[1001], [501], [480]]], dtype=tf.float32)
        # This is also important to not loose any time steps in the attribution.
        sequence_length = tf.constant([3, 3])

        # TODO(milah): Not clear why this test doesn't work for the RNN.
        def construct_logits_fn(unused_diff_delta_time, obs_values,
                                unused_indicator, unused_sequence_length,
                                unused_seq_mask, unused_hparams, reuse):
            result = tf.layers.dense(obs_values,
                                     num_classes,
                                     name='test1',
                                     reuse=reuse,
                                     activation=None) * (tf.expand_dims(
                                         obs_values[:, 0, :], axis=1) + 0.5)
            return result, None

        # First setup the weights of the RNN.
        logits, _ = construct_logits_fn(diff_delta_time, obs_values, indicator,
                                        sequence_length, None, None, False)
        # To verify the correctness of the attribution we compute the prediction at
        # the obs_values_base.
        base_logits, _ = construct_logits_fn(diff_delta_time, obs_values_base,
                                             indicator, sequence_length, None,
                                             None, True)

        # Set high for increased precision of the approximation.
        num_steps = 100
        hparams = contrib_training.HParams(
            sequence_prediction=True,
            use_rnn_attention=False,
            path_integrated_gradients_num_steps=num_steps,
            attribution_max_delta_time=attribution_max_delta_time)
        gradients = osm.compute_path_integrated_gradient_attribution(
            obs_values, indicator, diff_delta_time, delta_time,
            sequence_length, None, hparams, construct_logits_fn)
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            actual_logits = sess.run(logits)
            actual_base_logits = sess.run(base_logits)
            actual_gradients = sess.run(gradients)
            self.assertAllClose(actual_logits - actual_base_logits,
                                actual_gradients,
                                atol=0.001)
Beispiel #9
0
def main():
    args = _parse_arguments(sys.argv[1:])
    hparams = training.HParams(**args.__dict__)
    tf.logging.set_verbosity(tf.logging.INFO)
    run_experiment(hparams)
Beispiel #10
0
DEFAULT_HPARAMS = tf_utils.merge_hparams(
    audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS,
    contrib_training.HParams(
        eval_batch_size=1,
        predict_batch_size=1,
        shuffle_buffer_size=64,
        sample_rate=16000,
        spec_type='mel',
        spec_mel_htk=True,
        spec_log_amplitude=True,
        spec_hop_length=512,
        spec_n_bins=229,
        spec_fmin=30.0,  # A0
        cqt_bins_per_octave=36,
        truncated_length_secs=0.0,
        max_expected_train_example_len=0,
        onset_length=32,
        offset_length=32,
        onset_mode='length_ms',
        onset_delay=0,
        min_frame_occupancy_for_label=0.0,
        jitter_amount_ms=0,
        min_duration_ms=0,
        backward_shift_amount_ms=0,
        velocity_scale=80.0,
        velocity_bias=10.0,
        drum_data_map='',
        drum_prediction_map='',
        velocity_loss_weight=1.0,
        splice_n_examples=0))

CONFIG_MAP = {}
Beispiel #11
0
    def testBasicModelFn(self, sequence_prediction, include_gradients,
                         include_gradients_sum_time,
                         include_gradients_avg_time,
                         include_path_integrated_gradients,
                         include_diff_sequence_prediction, use_rnn_attention,
                         attention_hidden_layer_dim, volatility_loss_factor):
        """This high-level tests ensures there are no errors during training.

    It also checks that the loss is decreasing.

    Args:
      sequence_prediction: Whether to consider the recent predictions in the
        loss or only the most last prediction.
      include_gradients: Whether to generate attribution with the
        gradients of the last predictions.
      include_gradients_sum_time: Whether to generate attribution
        with the gradients of the sum of the predictions over time.
      include_gradients_avg_time: Whether to generate attribution
        with the gradients of the average of the predictions over time.
      include_path_integrated_gradients: Whether to generate
        attribution with the integrated gradients of last predictions compared
        to their most recent values before attribution_max_delta_time.
      include_diff_sequence_prediction: Whether to
        generate attribution from the difference of consecutive predictions.
      use_rnn_attention: Whether to use attention for the RNN.
      attention_hidden_layer_dim: If use_rnn_attention what the dimensionality
        of a hidden layer should be (or 0 if none) of last output and
        intermediates before multiplying to obtain a weight.
      volatility_loss_factor: Include the sum of the changes in predictions
        across the sequence in the loss multiplied by this factor.
    """
        num_steps = 2
        hparams = contrib_training.HParams(
            batch_size=2,
            learning_rate=0.008,
            sequence_features=[
                'deltaTime', 'Observation.code',
                'Observation.valueQuantity.value'
            ],
            categorical_values=['loinc:1', 'loinc:2', 'MISSING'],
            categorical_seq_feature='Observation.code',
            context_features=['sequenceLength'],
            feature_value='Observation.valueQuantity.value',
            label_key='label.in_hospital_death',
            attribution_threshold=-1.0,
            rnn_size=6,
            variational_recurrent_keep_prob=1.1,
            variational_input_keep_prob=1.1,
            variational_output_keep_prob=1.1,
            sequence_prediction=sequence_prediction,
            time_decayed=False,
            normalize=True,
            momentum=0.9,
            min_value=-1000.0,
            max_value=1000.0,
            volatility_loss_factor=volatility_loss_factor,
            attribution_max_delta_time=100000,
            input_keep_prob=1.0,
            include_sequence_prediction=sequence_prediction,
            include_gradients_attribution=include_gradients,
            include_gradients_sum_time_attribution=include_gradients_sum_time,
            include_gradients_avg_time_attribution=include_gradients_avg_time,
            include_path_integrated_gradients_attribution=(
                include_path_integrated_gradients),
            include_diff_sequence_prediction_attribution=(
                include_diff_sequence_prediction),
            use_rnn_attention=use_rnn_attention,
            attention_hidden_layer_dim=attention_hidden_layer_dim,
            path_integrated_gradients_num_steps=10,
        )
        observation_values = tf.SparseTensor(
            indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [1, 0, 0], [1, 1, 0],
                     [1, 2, 0]],
            values=[100.0, 2.3, 9999999.0, 0.5, 0.0, 4.0],
            dense_shape=[2, 3, 1])
        model = osm.ObservationSequenceModel()
        model_fn = model.create_model_fn(hparams)
        features = {
            input_fn.CONTEXT_KEY_PREFIX + 'sequenceLength':
            tf.constant([[2], [3]], dtype=tf.int64),
            input_fn.SEQUENCE_KEY_PREFIX + 'Observation.code':
            tf.SparseTensor(indices=observation_values.indices,
                            values=[
                                'loinc:2', 'loinc:1', 'loinc:2', 'loinc:1',
                                'MISSING', 'loinc:1'
                            ],
                            dense_shape=observation_values.dense_shape),
            input_fn.SEQUENCE_KEY_PREFIX + 'Observation.valueQuantity.value':
            observation_values,
            input_fn.SEQUENCE_KEY_PREFIX + 'deltaTime':
            tf.constant([[[1], [2], [0]], [[1], [3], [4]]], dtype=tf.int64)
        }
        label_key = 'label.in_hospital_death'
        labels = {label_key: tf.constant([[1.0], [0.0]], dtype=tf.float32)}
        with tf.variable_scope('test'):
            model_fn_ops_train = model_fn(features, labels,
                                          tf.estimator.ModeKeys.TRAIN)
        with tf.variable_scope('test', reuse=True):
            features[input_fn.CONTEXT_KEY_PREFIX +
                     'label.in_hospital_death'] = tf.SparseTensor(
                         indices=[[0, 0]],
                         values=['expired'],
                         dense_shape=[2, 1])
            model_fn_ops_eval = model_fn(features,
                                         labels=None,
                                         mode=tf.estimator.ModeKeys.PREDICT)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.tables_initializer())
            # Test train.
            for i in range(num_steps):
                loss, _ = sess.run(
                    [model_fn_ops_train.loss, model_fn_ops_train.train_op])
                if i == 0:
                    initial_loss = loss
            self.assertLess(loss, initial_loss)
            # Test infer.
            sess.run(model_fn_ops_eval.predictions)
 def setUp(self):
     self.config = events_rnn_model.EventSequenceRnnConfig(
         None,
         magenta.music.OneHotEventSequenceEncoderDecoder(
             magenta.music.MultiDrumOneHotEncoding()),
         contrib_training.HParams())
Beispiel #13
0
def create_hparams(hparams_override_str=''):
    """Creates default HParams with the option of overrides.

  Args:
    hparams_override_str: String with possible overrides.

  Returns:
    Default HParams.
  """
    hparams = contrib_training.HParams(
        # Sequence features are bucketed by their age at time of prediction in:
        # [time_windows[0] - time_windows[1]),
        # [time_windows[1] - time_windows[2]),
        # ...
        time_windows=[
            5 * 365 * 24 * 60 * 60,  # 5 years
            365 * 24 * 60 * 60,  # 1 year
            30 * 24 * 60 * 60,  # 1 month
            7 * 24 * 60 * 60,  # 1 week
            1 * 24 * 60 * 60,  # 1 day
            0,  # now
        ],
        batch_size=64,
        learning_rate=0.003,
        dedup=True,
        # Currently supported optimizers are Adam and Ftrl.
        optimizer='Ftrl',
        # Note that these regularization terms are only applied for Ftrl.
        l1_regularization_strength=0.0,
        l2_regularization_strength=0.0,
        include_age=True,
        age_boundaries=[1, 5, 18, 30, 50, 70, 90],
        categorical_context_features=['Patient.gender'],
        sequence_features=[
            'Composition.section.text.div.tokenized',
            'Composition.type',
            'Condition.code',
            'Encounter.hospitalization.admitSource',
            'Encounter.reason.hcc',
            'MedicationRequest.contained.medication.code.gsn',
            'Procedure.code.cpt',
        ],
        # Number of hash buckets to map the tokens of the sequence_features into.
        sequence_bucket_sizes=[
            17000,
            16,
            3052,
            10,
            62,
            1600,
            732,
        ],
        # List of strings each of which is a ':'-separated list of feature that we
        # want to concatenate over the time dimension
        time_crossed_features=[
            '%s:%s:%s:%s' %
            ('Observation.code', 'Observation.value.quantity.value',
             'Observation.value.quantity.unit', 'Observation.value.string')
        ],
        time_concat_bucket_sizes=[39571],
        context_bucket_sizes=[4],
        # Model type needs to be linear or dnn.
        model_type='linear',
        # In case of model_type of dnn we can specify the hidden layer dimension.
        dnn_hidden_units=[256],
        # In case of model_type of dnn we can specify the dropout probability.
        dnn_dropout=0.1)
    # hparams_override_str override any of the preceding hyperparameter values.
    if hparams_override_str:
        hparams = hparams.parse(hparams_override_str)
    return hparams
Beispiel #14
0
        self.num_velocity_bins = num_velocity_bins
        self.control_signals = control_signals
        self.optional_conditioning = optional_conditioning
        self.note_performance = note_performance


default_configs = {
    'performance':
    PerformanceRnnConfig(
        magenta.music.protobuf.generator_pb2.GeneratorDetails(
            id='performance', description='Performance RNN'),
        magenta.music.OneHotEventSequenceEncoderDecoder(
            magenta.music.PerformanceOneHotEncoding()),
        contrib_training.HParams(batch_size=64,
                                 rnn_layer_sizes=[512, 512, 512],
                                 dropout_keep_prob=1.0,
                                 clip_norm=3,
                                 learning_rate=0.001)),
    'performance_with_dynamics':
    PerformanceRnnConfig(
        magenta.music.protobuf.generator_pb2.GeneratorDetails(
            id='performance_with_dynamics',
            description='Performance RNN with dynamics'),
        magenta.music.OneHotEventSequenceEncoderDecoder(
            magenta.music.PerformanceOneHotEncoding(num_velocity_bins=32)),
        contrib_training.HParams(batch_size=64,
                                 rnn_layer_sizes=[512, 512, 512],
                                 dropout_keep_prob=1.0,
                                 clip_norm=3,
                                 learning_rate=0.001),
        num_velocity_bins=32),
def get_hparams(**kwargs):
    """Creates a set of default hyperparameters.

  Note that in addition to the hyperparameters described below, the full set of
  hyperparameters includes input_ops.get_hparams() for specifying the input data
  pipeline (see that function for input_ops hyperparameter descriptions).

  Model hyperparameters:
    grammar_path: String, the filename of txt file containing the grammar
        production rules. Expressions will be parsed by this grammar.
    learning_rate: Float, learning rate.
    learning_rate_decay_rate: Float, decay rate for tf.train.exponential_decay.
    learning_rate_decay_step: Integer, decay steps for
        tf.train.exponential_decay.
    optimizer: String, optimizer name. Must be one of
        tf.contrib.layers.OPTIMIZER_CLS_NAMES.
    save_checkpoints_secs: Integer, number of seconds between model checkpoints.
    keep_checkpoint_max: Integer, the maximum number of recent checkpoint files
        to keep. As new files are created, older files are deleted.
        If None or 0, all checkpoint files are kept.
    start_delay_secs: Integer, number of seconds to wait before starting
        evaluations.
    throttle_secs: Integer, number of seconds between evaluations.
    train_steps: Integer, maximum number of training steps. Set to None to train
        forever.
    eval_steps: Integer, number of steps for each evaluation. Set to None to
        evaluate the entire tune/test set.
    embedding_size: Integer, the size of production rule embedding.
    symbolic_properties: List of strings, symbolic properties to concatenate on
        embedding as conditions.
    numerical_points: List of floats, points to evaluate expression values.
    gru_hidden_sizes: List of integers, number of units for each GRU layer.
    bidirectional: Boolean, whether to use bidirectional RNN.
    generation_leading_powers_abs_sums: List of integers, the sum of leading
        power at 0 and at inf, defining the condition in generation.
        For example, if generation_leading_powers_abs_sums = [1, 2],
        expressions will be generated with
        the following conditions (leading_at_0, leading_at_inf):
        (0, 1), (-1, 0), (0, -1), (1, 0)
        (0, 2), (-1, 1), (-2, 0), (-1, -1), (0, -2), (1, -1), (2, 0), (1, 1)
        This is used for eval.
    num_expressions_per_condition: Integer, the number of expressions to
        generate for each condition. This is used for eval. Default 0, no
        generation in eval.
    exports_to_keep: Integer, the number of latest exported model to keep.

  Args:
    **kwargs: Dict of parameter overrides.

  Returns:
    HParams.
  """
    hparams = contrib_training.HParams(
        grammar_path=None,
        learning_rate=0.01,
        learning_rate_decay_rate=1.0,
        learning_rate_decay_steps=100000,
        optimizer='Adagrad',
        save_checkpoints_secs=600,
        keep_checkpoint_max=20,
        start_delay_secs=300,
        throttle_secs=300,
        train_steps=None,
        eval_steps=None,
        embedding_size=10,
        symbolic_properties=core.HPARAMS_EMPTY_LIST_STRING,
        numerical_points=core.HPARAMS_EMPTY_LIST_FLOAT,
        gru_hidden_sizes=[100],
        bidirectional=False,
        generation_leading_powers_abs_sums=core.HPARAMS_EMPTY_LIST_INT,
        num_expressions_per_condition=0,
        exports_to_keep=50)

    # Add hparams from input_ops.
    # Using add_hparam ensures there are no duplicated parameters.
    for key, value in six.iteritems(input_ops.get_hparams().values()):
        if key in hparams.values():
            continue  # Skip duplicated parameters.
        hparams.add_hparam(key, value)
    return hparams.override_from_dict(kwargs)
    def test_model_integration(self):
        features, labels = input_fn.get_input_fn(
            tf.estimator.ModeKeys.TRAIN, [self.input_data_dir],
            'label.in_hospital_death.class',
            sequence_features=[
                'Observation.code', 'Observation.value.quantity.value',
                'Observation.value.quantity.unit',
                'Observation.code.harmonized:valueset-observation-name'
            ],
            dense_sequence_feature='Observation.value.quantity.value',
            required_sequence_feature=
            'Observation.code.harmonized:valueset-observation-name',
            batch_size=2,
            shuffle=False)()
        num_steps = 2
        hparams = contrib_training.HParams(
            batch_size=2,
            learning_rate=0.008,
            sequence_features=[
                'deltaTime', 'Observation.code',
                'Observation.value.quantity.value'
            ],
            categorical_values=['loinc:4', 'loinc:6', 'loinc:1'],
            categorical_seq_feature='Observation.code',
            context_features=['sequenceLength'],
            feature_value='Observation.value.quantity.value',
            label_key='label.in_hospital_death.class',
            attribution_threshold=-1.0,
            rnn_size=6,
            variational_recurrent_keep_prob=1.1,
            variational_input_keep_prob=1.1,
            variational_output_keep_prob=1.1,
            sequence_prediction=False,
            time_decayed=False,
            normalize=True,
            momentum=0.9,
            min_value=-1000.0,
            max_value=1000.0,
            volatility_loss_factor=0.0,
            attribution_max_delta_time=100000,
            input_keep_prob=1.0,
            include_sequence_prediction=False,
            include_gradients_attribution=True,
            include_gradients_sum_time_attribution=False,
            include_gradients_avg_time_attribution=False,
            include_path_integrated_gradients_attribution=True,
            include_diff_sequence_prediction_attribution=False,
            use_rnn_attention=True,
            attention_hidden_layer_dim=5,
            path_integrated_gradients_num_steps=10,
        )
        model = osm.ObservationSequenceModel()
        model_fn = model.create_model_fn(hparams)
        with tf.variable_scope('test'):
            model_fn_ops_train = model_fn(features, labels,
                                          tf.estimator.ModeKeys.TRAIN)
        with tf.variable_scope('test', reuse=True):
            model_fn_ops_eval = model_fn(features,
                                         labels=None,
                                         mode=tf.estimator.ModeKeys.PREDICT)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.tables_initializer())
            # Test train.
            for i in range(num_steps):
                loss, _ = sess.run(
                    [model_fn_ops_train.loss, model_fn_ops_train.train_op])
                if i == 0:
                    initial_loss = loss
            self.assertLess(loss, initial_loss)
            # Test infer.
            sess.run(model_fn_ops_eval.predictions)
def pegasus_large_params(param_overrides):
    """Params for PegasusLarge."""

    hparams = contrib_training.HParams(
        train_pattern="tfds_transformed:common_crawl-train",
        dev_pattern="tfds_transformed:common_crawl-validation",
        test_pattern="tfds_transformed:common_crawl-test",
        vocab_filename="pegasus/ops/testdata/sp_test.model",
        encoder_type="sentencepiece_newline",
        parser_strategy="dynamic_rouge",
        parser_masked_sentence_ratio=0.45,
        parser_masked_words_ratio=0.0,
        # Configure the options of word masking
        # The sum of the three probs below (mask word by MSK, random, or intact)
        # should be 1.
        # By default, following the word masking procedure of BERT, which is
        # 80% by <MSK>, 10% by random tokens, 10% remain unchanged.
        parser_mask_word_by_msk_token_prob=0.8,
        parser_mask_word_by_random_token_prob=0.1,
        parser_mask_word_by_intact_prob=0.1,
        # Configure the options of sentence masking.
        # The sum of the four probs below (mask sentence by MSK, random, intact
        # or remove) should be 1.
        # The four sentence masking options:
        #   1. Masking seleted sentences by <MSK>. In practice, the <MSK> token
        #      for sentences is different from the <MSK> token for words in order
        #      to distinguish sentence masking and word masking.
        #   2. Masking selected sentences by another sentences which are randomly
        #      picked from the same document.
        #   3. Masking selected sentences by leaving them unchanged.
        #   4. Masking selected sentences by removing them from inputs.
        parser_mask_sentence_by_msk_token_prob=0.9,
        parser_mask_sentence_by_random_sentence_prob=0.,
        parser_mask_sentence_by_intact_prob=0.1,
        parser_mask_sentence_by_remove_prob=0.,
        # rouge_ngrams_size: a positive integer
        parser_rouge_ngrams_size=1,
        # rouge_metric_type: precision, recall, F
        parser_rouge_metric_type="F",
        # rouge_compute_option: standard, deduplicate, log
        #   standard: number of each ngram counted as it appears
        #   deduplicate: number of each ngram counted once only
        #   log: apply log(1+n) when compute the appearance of each ngram
        parser_rouge_compute_option="standard",
        parser_rouge_stopwords_filename=
        "pegasus/ops/testdata/english_stopwords",
        parser_rouge_noise_ratio=0.20,
        parser_dynamic_mask_min_ratio=0.33,
        # if greater than zero, assign target into buckets by
        # length // bucket_size, the bucket id is appended to the start of inputs.
        # the bucket id uses the reserved bucket ids, starting from the start id,
        # goes up to the maximum number of reseerved tokens.
        length_bucket_size=0,
        add_task_id=False,
        batch_size=16,
        max_input_len=512,
        max_target_len=256,
        max_decode_len=256,
        max_total_words=0,
        pretrain_target_filter_min=0,
        hidden_size=1024,
        filter_size=4096,
        num_heads=16,
        num_encoder_layers=16,
        num_decoder_layers=16,
        optimizer_name="adafactor",
        learning_rate=0.01,
        label_smoothing=0.0,
        dropout=0.1,
        train_steps=1500000,
        beam_size=1,
        eval_max_predictions=1000,
        use_bfloat16=False,
        model=None,
        encoder=None,
        parser=None,
        estimator_prediction_fn=None,
        eval=None,
        estimator_eval_metrics_fn=estimator_metrics.pretrain_eval_metrics_fn,
    )

    if param_overrides:
        hparams.parse(param_overrides)

    # Check values
    if (hparams.parser_mask_word_by_msk_token_prob +
            hparams.parser_mask_word_by_random_token_prob +
            hparams.parser_mask_word_by_intact_prob) != 1.:
        raise ValueError("The sum of rates of the three word masking options "
                         "(MSK, random, intact) does not equal to 1.")
    if (hparams.parser_mask_sentence_by_msk_token_prob +
            hparams.parser_mask_sentence_by_random_sentence_prob +
            hparams.parser_mask_sentence_by_intact_prob +
            hparams.parser_mask_sentence_by_remove_prob) != 1.:
        raise ValueError(
            "The sum of rates of the four sentence masking options "
            "(MSK, random, intact, skip) does not equal to 1.")
    hparams.encoder = public_parsing_ops.create_text_encoder(
        hparams.encoder_type, hparams.vocab_filename)
    hparams.parser = functools.partial(
        parsers.string_features_for_pretraining_parser,
        hparams.vocab_filename,
        hparams.encoder_type,
        hparams.max_input_len,
        hparams.max_target_len,
        hparams.max_total_words,
        hparams.parser_strategy,
        hparams.parser_masked_sentence_ratio,
        hparams.parser_masked_words_ratio, [
            hparams.parser_mask_word_by_msk_token_prob,
            hparams.parser_mask_word_by_random_token_prob,
            hparams.parser_mask_word_by_intact_prob
        ], [
            hparams.parser_mask_sentence_by_msk_token_prob,
            hparams.parser_mask_sentence_by_random_sentence_prob,
            hparams.parser_mask_sentence_by_intact_prob,
            hparams.parser_mask_sentence_by_remove_prob
        ],
        hparams.parser_rouge_ngrams_size,
        hparams.parser_rouge_metric_type,
        hparams.parser_rouge_compute_option,
        hparams.parser_rouge_stopwords_filename,
        NUM_RESERVED_TOKENS,
        parser_rouge_noise_ratio=hparams.parser_rouge_noise_ratio,
        parser_dynamic_mask_min_ratio=hparams.parser_dynamic_mask_min_ratio,
        input_feature="inputs",
        pretrain_target_filter_min=hparams.pretrain_target_filter_min,
        length_bucket_size=hparams.length_bucket_size,
        length_bucket_start_id=LENGTH_BUCKET_START_ID,
        length_bucket_max_id=TASK_START_ID - 1,
        add_task_id=hparams.add_task_id,
        task_start_id=TASK_START_ID)
    hparams.model = functools.partial(
        transformer.TransformerEncoderDecoderModel, hparams.encoder.vocab_size,
        hparams.hidden_size, hparams.filter_size, hparams.num_heads,
        hparams.num_encoder_layers, hparams.num_decoder_layers,
        hparams.label_smoothing, hparams.dropout)

    def decode_fn(features):
        return hparams.model().predict(features, hparams.max_decode_len,
                                       hparams.beam_size)

    hparams.estimator_prediction_fn = decode_fn
    hparams.eval = functools.partial(text_eval.text_eval,
                                     hparams.encoder,
                                     num_reserved=NUM_RESERVED_TOKENS)
    return hparams
Beispiel #18
0
                                 beam_size, branch_factor, steps_per_iteration,
                                 modify_events_callback=modify_events_callback)

  def polyphonic_sequence_log_likelihood(self, sequence):
    """Evaluate the log likelihood of a polyphonic sequence.

    Args:
      sequence: The PolyphonicSequence object for which to evaluate the log
          likelihood.

    Returns:
      The log likelihood of `sequence` under this model.
    """
    return self._evaluate_log_likelihood([sequence])[0]


default_configs = {
    'polyphony': events_rnn_model.EventSequenceRnnConfig(
        generator_pb2.GeneratorDetails(
            id='polyphony',
            description='Polyphonic RNN'),
        magenta.music.OneHotEventSequenceEncoderDecoder(
            polyphony_encoder_decoder.PolyphonyOneHotEncoding()),
        contrib_training.HParams(
            batch_size=64,
            rnn_layer_sizes=[256, 256, 256],
            dropout_keep_prob=0.5,
            clip_norm=5,
            learning_rate=0.001)),
}
def transformer_params(patterns, param_overrides):
    """Params for TransformerEncoderDecoderMLModel.

  Args:
    patterns: a dict include train_pattern, dev_pattern, test_pattern
    param_overrides: a string, comma separated list of name=value

  Returns:
    A instance of HParams
  """

    hparams = contrib_training.HParams(
        train_pattern=patterns["train_pattern"],
        dev_pattern=patterns["dev_pattern"],
        test_pattern=patterns["test_pattern"],
        vocab_filename="pegasus/ops/testdata/sp_test.model",
        encoder_type="sentencepiece_newline",
        length_bucket_size=0,
        add_task_id=False,
        batch_size=patterns["batch_size"],
        max_input_len=patterns["max_input_len"],
        max_target_len=patterns["max_output_len"],
        max_decode_len=patterns["max_output_len"],
        hidden_size=1024,
        filter_size=4096,
        num_heads=16,
        num_encoder_layers=16,
        num_decoder_layers=16,
        beam_size=1,
        beam_start=5,
        beam_alpha=0.8,
        beam_min=0,
        beam_max=-1,
        temperature=0.0,
        top_k=0,
        top_p=0.0,
        optimizer_name="adafactor",
        train_steps=patterns["train_steps"],
        learning_rate=patterns["learning_rate"],
        label_smoothing=0.1,
        dropout=0.1,
        eval_max_predictions=patterns.get("eval_steps", 1000),
        use_bfloat16=False,
        model=None,
        parser=None,
        encoder=None,
        estimator_prediction_fn=None,
        eval=None,
        estimator_eval_metrics_fn=estimator_metrics.gen_eval_metrics_fn,
    )

    if param_overrides:
        hparams.parse(param_overrides)

    hparams.parser = functools.partial(
        parsers.supervised_strings_parser,
        hparams.vocab_filename,
        hparams.encoder_type,
        hparams.max_input_len,
        hparams.max_target_len,
        length_bucket_size=hparams.length_bucket_size,
        length_bucket_start_id=pegasus_params.LENGTH_BUCKET_START_ID,
        length_bucket_max_id=pegasus_params.TASK_START_ID - 1,
        add_task_id=hparams.add_task_id,
        task_start_id=pegasus_params.TASK_START_ID)

    hparams.encoder = public_parsing_ops.create_text_encoder(
        hparams.encoder_type, hparams.vocab_filename)

    hparams.model = functools.partial(
        transformer.TransformerEncoderDecoderModel, hparams.encoder.vocab_size,
        hparams.hidden_size, hparams.filter_size, hparams.num_heads,
        hparams.num_encoder_layers, hparams.num_decoder_layers,
        hparams.label_smoothing, hparams.dropout)

    beam_keys = ("beam_start", "beam_alpha", "beam_min", "beam_max",
                 "temperature", "top_k", "top_p")
    beam_kwargs = {
        k: hparams.get(k)
        for k in beam_keys if k in hparams.values()
    }

    def decode_fn(features):
        return hparams.model().predict(features, hparams.max_decode_len,
                                       hparams.beam_size, **beam_kwargs)

    hparams.estimator_prediction_fn = decode_fn
    hparams.eval = functools.partial(
        text_eval.text_eval,
        hparams.encoder,
        num_reserved=pegasus_params.NUM_RESERVED_TOKENS)

    return hparams
def main(_):
    if FLAGS.dataset == 'cifar10':
        data_path = './cifar10_data/'
        assert FLAGS.train_size <= 50000
        validation_size = 50000 - FLAGS.train_size
    elif FLAGS.dataset == 'cifar100':
        data_path = './cifar100_data/'
        assert FLAGS.train_size <= 50000
        validation_size = 50000 - FLAGS.train_size
    elif FLAGS.dataset == 'svhn':
        data_path = './svhn_dataset/'
        assert FLAGS.train_size <= 73257
        validation_size = 73257 - FLAGS.train_size
    else:
        raise ValueError('Invalid dataset: %s' % FLAGS.dataset)

    hparams = contrib_training.HParams(
        train_size=FLAGS.train_size,
        validation_size=validation_size,
        eval_test=1,
        dataset=FLAGS.dataset,
        extra_dataset=FLAGS.extra_dataset,
        frequency=FLAGS.frequency,
        amplitude=FLAGS.amplitude,
        data_path=data_path,
        batch_size=256,
        gradient_clipping_by_global_norm=5.0,
        dummy_f=FLAGS.dummy_f,
        augment_type=FLAGS.augment_type,
        mixup_alpha=FLAGS.mixup_alpha,
        num_augmentation_layers=FLAGS.num_augmentation_layers,
        augmentation_magnitude=FLAGS.augmentation_magnitude,
        augmentation_probability=FLAGS.augmentation_probability,
        freq_augment_amplitude=FLAGS.freq_augment_amplitude,
        freq_augment_ffrac=FLAGS.freq_augment_ffrac,
        apply_cutout=FLAGS.apply_cutout,
        apply_flip_crop=FLAGS.apply_flip_crop,
        num_epochs=FLAGS.num_epochs,
        weight_decay_rate=FLAGS.weight_decay_rate,
        lr=FLAGS.lr,
        model_name=FLAGS.model_name,
        is_gan_data=FLAGS.is_gan_data,
        use_fixup=FLAGS.use_fixup,
        use_batchnorm=FLAGS.use_batchnorm,
        use_gamma_swish=FLAGS.use_gamma_swish,
        init_beta=FLAGS.init_beta,
        init_gamma=FLAGS.init_gamma,
        noise_type=FLAGS.noise_type,
        spatial_frequency=FLAGS.spatial_frequency,
        noise_seed=FLAGS.noise_seed,
        noise_class=FLAGS.noise_class,
        max_accuracy=FLAGS.max_accuracy,
        min_loss=FLAGS.min_loss,
        teacher_model=FLAGS.teacher_model,
        distillation_alpha=FLAGS.distillation_alpha,
        normalize_amplitude=FLAGS.normalize_amplitude,
        ckpt_every=FLAGS.ckpt_every,
    )
    tf.logging.info('All hparams : {}'.format(hparams))

    if FLAGS.model_name == 'wrn_32':
        setattr(hparams, 'model_name', 'wrn')
        hparams.add_hparam('wrn_size', 32)
    elif FLAGS.model_name == 'wrn_160':
        setattr(hparams, 'model_name', 'wrn')
        hparams.add_hparam('wrn_size', 160)
    elif FLAGS.model_name == 'shake_shake_32':
        setattr(hparams, 'model_name', 'shake_shake')
        hparams.add_hparam('shake_shake_widen_factor', 2)
    elif FLAGS.model_name == 'shake_shake_96':
        setattr(hparams, 'model_name', 'shake_shake')
        hparams.add_hparam('shake_shake_widen_factor', 6)
    elif FLAGS.model_name == 'shake_shake_112':
        setattr(hparams, 'model_name', 'shake_shake')
        hparams.add_hparam('shake_shake_widen_factor', 7)
    elif FLAGS.model_name == 'pyramid_net':
        setattr(hparams, 'model_name', 'pyramid_net')
        hparams.batch_size = 64
    else:
        raise ValueError('Not Valid Model Name: %s' % FLAGS.model_name)
    tf.logging.info('All hparams : {}'.format(hparams))

    cifar_trainer = CifarModelTrainer(hparams)
    cifar_trainer.run_model()
Beispiel #21
0
def build_hparams(params=def_params):
    return training.HParams(**params)
Beispiel #22
0
Config = collections.namedtuple('Config', ('model_fn', 'hparams'))

DEFAULT_HPARAMS = tf_utils.merge_hparams(
    audio_transform.DEFAULT_AUDIO_TRANSFORM_HPARAMS,
    contrib_training.HParams(
        eval_batch_size=1,
        predict_batch_size=1,
        onset_only_sequence_prediction=False,
        shuffle_buffer_size=64,
        sample_rate=16000,
        spec_type='mel',
        spec_mel_htk=True,
        spec_log_amplitude=True,
        spec_hop_length=512,
        spec_n_bins=229,
        spec_fmin=30.0,  # A0
        cqt_bins_per_octave=36,
        truncated_length_secs=0.0,
        max_expected_train_example_len=0,
        onset_length=32,
        offset_length=32,
        onset_mode='length_ms',
        onset_delay=0,
        min_frame_occupancy_for_label=0.0,
        jitter_amount_ms=0,
        min_duration_ms=0,
        backward_shift_amount_ms=0))

CONFIG_MAP = {}

CONFIG_MAP['onsets_frames'] = Config(
Beispiel #23
0
def copy_hparams(hparams):
    """Return a copy of an HParams instance."""
    return contrib_training.HParams(**hparams.values())
 def setUp(self):
     self.config = events_rnn_model.EventSequenceRnnConfig(
         None,
         magenta.music.OneHotEventSequenceEncoderDecoder(
             polyphony_encoder_decoder.PolyphonyOneHotEncoding()),
         contrib_training.HParams())
Beispiel #25
0
def imagenet_hparams():
    """Returns default ImageNet training params.

  These defaults are for full training. For search training, some should be
  modified to increase the speed of the search.
  """
    return contrib_training.HParams(
        ##########################################################################
        # Input pipeline params. #################################################
        ##########################################################################
        image_size=299,
        num_train_images=1281167,
        num_eval_images=50000,
        num_label_classes=1001,
        ##########################################################################
        # Architectural params. ##################################################
        ##########################################################################

        # The total number of regular cells (summed across all stacks). Reduction
        # cells are not included.
        num_cells=18,
        reduction_size=256,
        stem_reduction_size=32,

        # How many reduction cells to use between the stacks of regular cells.
        num_reduction_layers=2,

        # Stem.
        stem_type='imagenet',  # 'imagenet' or others
        num_stem_cells=2,  # 2 if stem_type == 'imagenet' else 0

        # Implementation details.
        data_format='NCHW',  # 'NHWC' or 'NCHW'.

        ##########################################################################
        # Training params. #######################################################
        ##########################################################################

        # Summed across all TPU cores training a model.
        train_batch_size=32,
        num_epochs=100.,

        # Auxiliary head.
        use_aux_head=True,
        aux_scaling=0.4,

        # Regularization.
        l1_decay_rate=0.0,
        label_smoothing=0.1,
        drop_connect_keep_prob=0.7,
        # `drop_connect_version` determines how the drop_connect probabilites are
        # set/increased over time:
        # -v1: increase dropout probability over training,
        # -v2: increase dropout probability as you increase the number of cells,
        #      so the top cell has the highest dropout and the lowest cell has the
        #      lowest dropout,
        # -v3: Do both v1 and v2.
        drop_connect_version='v1',
        drop_path_burn_in_steps=0,
        # `drop_connect_condition` determines under what conditions drop_connect
        # is used:
        # -identity: Dropout all paths except identity connections,
        # -all: Dropout all paths,
        # -separable: Dropout only paths containing a separable conv operation.
        dense_dropout_keep_prob=0.5,
        batch_norm_epsilon=0.001,
        batch_norm_decay=0.9997,
        shuffle_buffer=20000,

        # Any value <= 0 means it is unused
        gradient_clipping_by_global_norm=10.0,

        # Learning rate schedule.
        lr=0.015,
        lr_decay_method='exponential',
        lr_decay_value=0.97,
        lr_num_epochs_per_decay=2.4,
        lr_warmup_epochs=3.0,
        weight_decay=4e-05,

        # Optimizer.
        optimizer='rmsprop',  # 'sgd', 'mom', 'adam' or 'rmsprop'
        rmsprop_decay=0.9,
        rmsprop_momentum_rate=0.9,
        rmsprop_epsilon=1.0,
        momentum_rate=0.9,
        use_nesterov=1,

        ##########################################################################
        # Eval and reporting params. #############################################
        ##########################################################################

        # This number should be a multiple of the number of TPU shards
        # used for eval (e.g., 2 for a 1x1 or 8 for a 2x2).
        eval_batch_size=40,

        # How many different crops are fed into one model. Also affects training.
        num_input_images=1,
        moving_average_decay=0.9999,
        write_summaries=0,

        ##########################################################################
        # Other params. ##########################################################
        ##########################################################################
        num_shards=None,
        distributed_group_size=1,
        use_tpu=False)
Beispiel #26
0
default_configs = {
    'basic_improv':
        ImprovRnnConfig(
            magenta.music.protobuf.generator_pb2.GeneratorDetails(
                id='basic_improv',
                description='Basic melody-given-chords RNN with one-hot triad '
                            'encoding for chords.'),
            magenta.music.ConditionalEventSequenceEncoderDecoder(
                magenta.music.OneHotEventSequenceEncoderDecoder(
                    magenta.music.TriadChordOneHotEncoding()),
                magenta.music.OneHotEventSequenceEncoderDecoder(
                    magenta.music.MelodyOneHotEncoding(
                        min_note=DEFAULT_MIN_NOTE, max_note=DEFAULT_MAX_NOTE))),
            contrib_training.HParams(
                batch_size=128,
                rnn_layer_sizes=[64, 64],
                dropout_keep_prob=0.5,
                clip_norm=5,
                learning_rate=0.001)),
    'attention_improv':
        ImprovRnnConfig(
            magenta.music.protobuf.generator_pb2.GeneratorDetails(
                id='attention_improv',
                description='Melody-given-chords RNN with one-hot triad encoding '
                            'for chords, attention, and binary counters.'),
            magenta.music.ConditionalEventSequenceEncoderDecoder(
                magenta.music.OneHotEventSequenceEncoderDecoder(
                    magenta.music.TriadChordOneHotEncoding()),
                magenta.music.KeyMelodyEncoderDecoder(
                    min_note=DEFAULT_MIN_NOTE, max_note=DEFAULT_MAX_NOTE)),
            contrib_training.HParams(
                batch_size=128,
def get_hparams(**kwargs):
  """Get the hyperparameters for the model from a json object.

  Args:
    **kwargs: Dict of parameter overrides.
  Possible keyword arguments:
    atom_types: Dict. The possible atom types in the molecule.
    max_steps_per_episode: Integer. The maximum number of steps for one episode.
    allow_removal: Boolean. Whether to allow removal of a bond.
    allow_no_modification: Boolean. If true, the valid action set will include
      doing nothing to the current molecule, i.e., the current molecule itself
      will be added to the action set.
    replay_buffer_size: Integer. The size of the replay buffer.
    learning_rate: Float. Learning rate.
    learning_rate_decay_steps: Integer. The number of steps between each
      learning rate decay.
    learning_rate_decay_rate: Float. The rate of learning rate decay.
    num_episodes: Integer. Number of episodes to run.
    batch_size: Integer. The batch size.
    learning_frequency: Integer. The number of steps between each training
      operation.
    update_frequency: Integer. The number of steps between each update of the
      target Q network
    grad_clipping: Integer. maximum value of the gradient norm.
    gamma: Float. The discount factor for the reward.
    double_q: Boolean. Whether to used double Q learning.
      See https://arxiv.org/abs/1509.06461 for detail.
    bootstrap: Integer. The number of bootstrap heads. See
      https://arxiv.org/abs/1703.07608 for detail.
    prioritized: Boolean. Whether to use prioritized replay. See
      https://arxiv.org/abs/1511.05952 for detail.
    prioritized_alpha: Float. The parameter alpha in the prioritized replay.
    prioritized_beta: Float. The parameter beta in the prioritized replay.
    prioritized_epsilon: Float. The parameter epsilon in the prioritized replay.
    fingerprint_radius: Integer. The radius of the Morgan fingerprint.
    fingerprint_length: Integer. The length of the Morgan fingerprint.
    dense_layers: List of integers. The hidden units in the dense layers.
    activation: String. The activation function to use.
    optimizer: String. The optimizer to use.
    batch_norm: Boolean. Whether to use batch normalization.
    save_frequency: Integer. The number of episodes between each saving.

  Returns:
    A HParams object containing all the hyperparameters.
  """
  hparams = contrib_training.HParams(
      atom_types=['C', 'O', 'N'],
      max_steps_per_episode=40,
      allow_removal=True,
      allow_no_modification=True,
      allow_bonds_between_rings=False,
      allowed_ring_sizes=[3, 4, 5, 6],
      replay_buffer_size=1000000,
      learning_rate=1e-4,
      learning_rate_decay_steps=10000,
      learning_rate_decay_rate=0.8,
      num_episodes=5000,
      batch_size=64,
      learning_frequency=4,
      update_frequency=20,
      grad_clipping=10.0,
      gamma=0.9,
      double_q=True,
      num_bootstrap_heads=12,
      prioritized=False,
      prioritized_alpha=0.6,
      prioritized_beta=0.4,
      prioritized_epsilon=1e-6,
      fingerprint_radius=3,
      fingerprint_length=2048,
      dense_layers=[1024, 512, 128, 32],
      activation='relu',
      optimizer='Adam',
      batch_norm=False,
      save_frequency=1000,
      max_num_checkpoints=100,
      discount_factor=0.7)
  return hparams.override_from_dict(kwargs)
Beispiel #28
0
def run_trial(trial_idx, delta, algo_names):
    """Runs a trial of wheel bandit problem instance for a set of algorithms."""

    filename = os.path.join(FLAGS.datasetdir,
                            str(delta) + '_' + str(trial_idx) + '.npz')
    with gfile.GFile(filename, 'r') as f:
        sampled_vals = np.load(f)
        dataset = sampled_vals['dataset']
        opt_rewards = sampled_vals['opt_rewards']

    x_hidden_size = 100
    x_encoder_sizes = [x_hidden_size] * 2

    algos = []
    for algo_name in algo_names:
        if algo_name == 'uniform':
            hparams = contrib_training.HParams(num_actions=num_actions)
            algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
        elif algo_name == 'neurolinear':
            hparams = contrib_training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               output_activation=tf.nn.relu,
                                               layer_sizes=x_encoder_sizes,
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=1,
                                               training_freq_network=20,
                                               training_epochs=50,
                                               a0=12,
                                               b0=30,
                                               lambda_prior=23)
            algos.append(
                neural_linear_sampling.NeuralLinearPosteriorSampling(
                    algo_name, hparams))
        elif algo_name == 'multitaskgp':
            hparams_gp = contrib_training.HParams(
                num_actions=num_actions,
                num_outputs=num_actions,
                context_dim=context_dim,
                reset_lr=False,
                learn_embeddings=True,
                max_num_points=1000,
                show_training=False,
                freq_summary=1000,
                batch_size=512,
                keep_fixed_after_max_obs=True,
                training_freq=20,
                initial_pulls=2,
                training_epochs=50,
                lr=0.01,
                buffer_s=-1,
                initial_lr=0.001,
                lr_decay_rate=0.0,
                optimizer='RMS',
                task_latent_dim=5,
                activate_decay=False)
            algos.append(
                posterior_bnn_sampling.PosteriorBNNSampling(
                    algo_name, hparams_gp, 'GP'))
        elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp':
            hidden_size = 64
            latent_units = 32
            global_latent_net_sizes = [hidden_size] * 2 + [2 * latent_units]
            local_latent_net_sizes = [hidden_size] * 3 + [2]
            x_y_encoder_sizes = [hidden_size] * 3
            heteroskedastic_net_sizes = None
            mean_att_type = attention.laplace_attention
            scale_att_type_1 = attention.laplace_attention
            scale_att_type_2 = attention.laplace_attention
            att_type = 'multihead'
            att_heads = 8
            data_uncertainty = False
            is_anp = False

            config = algo_name.split('_')
            mfile = FLAGS.prefix + config[1] + '_' + config[2] + FLAGS.suffix
            if algo_name[:3] == 'anp':
                mfile = 'anp_' + mfile
                local_latent_net_sizes = [hidden_size] * 3 + [2 * 5]
                is_anp = True
            mpath = os.path.join(FLAGS.modeldir, mfile)

            hparams = contrib_training.HParams(
                num_actions=num_actions,
                context_dim=context_dim,
                init_scale=0.3,
                activation=tf.nn.relu,
                output_activation=tf.nn.relu,
                x_encoder_sizes=x_encoder_sizes,
                x_y_encoder_sizes=x_y_encoder_sizes,
                global_latent_net_sizes=global_latent_net_sizes,
                local_latent_net_sizes=local_latent_net_sizes,
                heteroskedastic_net_sizes=heteroskedastic_net_sizes,
                att_type=att_type,
                att_heads=att_heads,
                mean_att_type=mean_att_type,
                scale_att_type_1=scale_att_type_1,
                scale_att_type_2=scale_att_type_2,
                data_uncertainty=data_uncertainty,
                batch_size=512,
                activate_decay=True,
                initial_lr=0.1,
                max_grad_norm=5.0,
                show_training=False,
                freq_summary=1000,
                buffer_s=-1,
                initial_pulls=2,
                reset_lr=True,
                lr_decay_rate=0.5,
                training_freq=10,
                training_freq_network=20,
                training_epochs=50,
                uncertainty_type='attentive_freeform',
                local_variational=True,
                model_path=mpath,
                is_anp=is_anp)

            if config[1] == 'prior':
                hparams.set_hparam('local_variational', False)

            if config[2] == 'gp':
                hparams.set_hparam('uncertainty_type', 'attentive_gp')

            algos.append(
                offline_contextual_bandits.OfflineContextualBandits(
                    algo_name, hparams))

    t_init = time.time()
    _, h_rewards = contextual_bandit.run_contextual_bandit(
        context_dim,
        num_actions,
        dataset,
        algos,
        num_contexts=FLAGS.num_contexts)
    t_final = time.time()

    return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]
Beispiel #29
0
def main(_):

    tf.logging.set_verbosity(tf.logging.INFO)
    random.seed(FLAGS.random_seed)

    params = contrib_training.HParams(
        embedding=FLAGS.embedding,
        num_steps=FLAGS.num_steps,
        val_freq=FLAGS.val_freq,
        seq_len=FLAGS.seq_len,
        batch_size=FLAGS.batch_size,
        emb_size=FLAGS.emb_size,
        vocab_size=4,
        hidden_lstm_size=FLAGS.hidden_lstm_size,
        hidden_dense_size=FLAGS.hidden_dense_size,
        dropout_rate=FLAGS.dropout_rate,
        learning_rate=FLAGS.learning_rate,
        num_motifs=FLAGS.num_motifs,
        len_motifs=FLAGS.len_motifs,
        temperature=FLAGS.temperature,
        reweight_sample=FLAGS.reweight_sample,
        l2_reg=FLAGS.l2_reg,
        out_dir=FLAGS.out_dir,
        in_tr_data_dir=FLAGS.in_tr_data_dir,
        in_val_data_dir=FLAGS.in_val_data_dir,
        ood_val_data_dir=FLAGS.ood_val_data_dir,
        master=FLAGS.master,
        save_meta=FLAGS.save_meta,
        label_dict_file=FLAGS.label_dict_file,
        mutation_rate=FLAGS.mutation_rate,
        epsilon=FLAGS.epsilon,
    )

    # create output directories
    create_out_dir(params)

    # load datasets and labels for training
    params.add_hparam('in_tr_file_pattern', 'in_tr')
    params.add_hparam('in_val_file_pattern', 'in_val')
    params.add_hparam('ood_val_file_pattern', 'ood_val')
    label_sample_size, in_tr_dataset, in_val_dataset, ood_val_dataset = load_datasets_and_labels(
        params)
    params.add_hparam('n_class', len(label_sample_size))
    tf.logging.info('label_sample_size=%s', label_sample_size)

    # compute weights for labels
    # load the dictionary for class labels.
    # Key: class name (string), values: encoded class label (int)
    with tf.gfile.GFile(os.path.join(params.label_dict_file),
                        'rb') as f_label_code:
        # label_dict_after_2016_new_species0 = json.load(f)
        params.add_hparam('label_dict', yaml.safe_load(f_label_code))
        tf.logging.info('# of label_dict=%s', len(params.label_dict))

    label_weights = utils.compute_label_weights_using_sample_size(
        params.label_dict, label_sample_size)
    params.add_hparam('label_weights', label_weights)

    # print parameter settings
    tf.logging.info(params)
    with tf.gfile.GFile(os.path.join(params.model_dir, 'params.json'),
                        mode='w') as f:
        f.write(json.dumps(params.to_json(), sort_keys=True))

    # construct model
    tf.logging.info('create model')
    model = SeqPredModel(params)
    model.reset()

    ## if previous model ckpt exists, restore the model from there
    tf.logging.info('model dir=%s',
                    os.path.join(params.model_dir, '*.ckpt.index'))
    prev_steps, ckpt_file = utils.get_latest_ckpt(params.model_dir)
    if ckpt_file:
        tf.logging.info('previous ckpt exist, prev_steps=%s', prev_steps)
        model.restore_from_ckpt(ckpt_file)

    # training
    tf.logging.info('strart training')
    model.train(in_tr_dataset, in_val_dataset, ood_val_dataset, prev_steps)
Beispiel #30
0
  def _assertOptimizerWithNewLearningRate(self, optimizer_name):
    """Asserts successful updating of all learning rate schemes."""
    original_learning_rate = 0.7
    learning_rate_scaling = 0.1
    warmup_learning_rate = 0.07
    hparams = contrib_training.HParams(learning_rate=0.15)
    pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")

    # Constant learning rate.
    pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
    optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
    _update_optimizer_with_constant_learning_rate(optimizer,
                                                  original_learning_rate)
    _write_config(pipeline_config, pipeline_config_path)

    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
    configs = config_util.merge_external_params_with_configs(configs, hparams)
    optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
    constant_lr = optimizer.learning_rate.constant_learning_rate
    self.assertAlmostEqual(hparams.learning_rate, constant_lr.learning_rate)

    # Exponential decay learning rate.
    pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
    optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
    _update_optimizer_with_exponential_decay_learning_rate(
        optimizer, original_learning_rate)
    _write_config(pipeline_config, pipeline_config_path)

    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
    configs = config_util.merge_external_params_with_configs(configs, hparams)
    optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
    exponential_lr = optimizer.learning_rate.exponential_decay_learning_rate
    self.assertAlmostEqual(hparams.learning_rate,
                           exponential_lr.initial_learning_rate)

    # Manual step learning rate.
    pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
    optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
    _update_optimizer_with_manual_step_learning_rate(
        optimizer, original_learning_rate, learning_rate_scaling)
    _write_config(pipeline_config, pipeline_config_path)

    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
    configs = config_util.merge_external_params_with_configs(configs, hparams)
    optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
    manual_lr = optimizer.learning_rate.manual_step_learning_rate
    self.assertAlmostEqual(hparams.learning_rate,
                           manual_lr.initial_learning_rate)
    for i, schedule in enumerate(manual_lr.schedule):
      self.assertAlmostEqual(hparams.learning_rate * learning_rate_scaling**i,
                             schedule.learning_rate)

    # Cosine decay learning rate.
    pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
    optimizer = getattr(pipeline_config.train_config.optimizer, optimizer_name)
    _update_optimizer_with_cosine_decay_learning_rate(optimizer,
                                                      original_learning_rate,
                                                      warmup_learning_rate)
    _write_config(pipeline_config, pipeline_config_path)

    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
    configs = config_util.merge_external_params_with_configs(configs, hparams)
    optimizer = getattr(configs["train_config"].optimizer, optimizer_name)
    cosine_lr = optimizer.learning_rate.cosine_decay_learning_rate

    self.assertAlmostEqual(hparams.learning_rate, cosine_lr.learning_rate_base)
    warmup_scale_factor = warmup_learning_rate / original_learning_rate
    self.assertAlmostEqual(hparams.learning_rate * warmup_scale_factor,
                           cosine_lr.warmup_learning_rate)