Ejemplo n.º 1
0
def create_hparams(default_params, hparams_file=None, hparams_str=None):
    hparams = HParams(**default_params)
    if hparams_file is not None:
        with open(hparams_file) as f:
            hparams.parse_json(f.read())
    if hparams_str is not None:
        hparams.parse(hparams_str)
    return hparams
Ejemplo n.º 2
0
def create_hparams(hparam_string=None):
    """Create model hyperparameters. Parse nondefault from given string."""
    hparams = HParams(
        # The name of the architecture to use.
        final_endpoint='Mixed_7c',
        min_depth=16,
        depth_multiplier=1.0)

    if hparam_string:
        tf_logging.info('Parsing command line hparams: %s', hparam_string)
        hparams.parse(hparam_string)

    return hparams
Ejemplo n.º 3
0
def main(_argv):
    # Pass command-line arguments to RunConfig
    run_config = RunConfig(
        model_dir=tf.flags.FLAGS.model_dir,
        save_checkpoints_steps=tf.flags.FLAGS.save_checkpoints_steps)
    # Default hyperparameters
    hparams = HParams(l2=1e-3, lr=1e-3, hidden_layers=3, hidden_units=200) \
        # Parse the hparams command-line argument
    hparams.parse(tf.flags.FLAGS.hparams)
    # Run the experiment
    run(
        experiment_fn=experiment_fn,
        run_config=run_config,
        schedule=tf.flags.FLAGS.schedule,
        hparams=hparams)
def main():
    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument('--train-dir', type=str, default=None)
    parser.add_argument('--profile', action='store_true')
    parser.add_argument('--batch-size', type=int, default=32)
    parser.add_argument('--hparams', type=str, default=None)
    parser.add_argument('--max-steps', type=int, default=100000)
    parser.add_argument('--dataset-shards', type=int, default=None)

    args = parser.parse_args()

    tf.logging.set_verbosity(tf.logging.INFO)

    hparams = HParams(num_edges=800,
                      window_size=10,
                      num_negative=5,
                      batch_size=args.batch_size,
                      embedding_dim=128,
                      seed=42)

    if args.hparams is not None:
        hparams.parse(args.hparams)

    wikipedia_data = load_data_wikipedia_hyperlink()
    packed_labels = get_packed_labels(wikipedia_data['labels_sparse'])
    num_labels = np.max(packed_labels.labels) + 1

    input_fn = make_input_fn(wikipedia_data['adjacency_list'], packed_labels,
                             args.dataset_shards)

    estimator = tf.estimator.Estimator(
        label_clustering.make_label_clustering(num_labels),
        model_dir=args.train_dir,
        params=hparams,
        config=tf.estimator.RunConfig(tf_random_seed=hparams.seed))

    hooks = [
        tf.train.LoggingTensorHook(
            {'kappa_edges': 'kappa_edges_in_batch/value'}, every_n_secs=30)
    ]

    if args.profile:
        hooks.append(tf.train.ProfilerHook(save_secs=10))

    estimator.train(input_fn, max_steps=args.max_steps, hooks=hooks)
Ejemplo n.º 5
0
    def create_hparams(hparams_string=None, verbose=False):
        """Create model hyperparameters. Parse nondefault from given string."""

        hparams = HParams(
            ################################
            # General Parameters           #
            ################################

            logging_batch_index_perc=10,  # Percentage of samples used from the full dataset between logging the loss for training and testing.
            start_with_test=True,  # Determines if the model is tested first before any training loops.
                                   # The computed loss is also used to identify the best model so far.
                                   # Therefore, if this is False and use_best_as_final_model is True
                                   # the best model of the current training will be saved, which possibly
                                   # overrides an older better model.
            log_memory_consumption=True,
            epochs_per_test=1,  # Number of training epochs before testing (NOTE that this includes the scheduler_type with epoch scheduling).

            networks_dir="nn",
            checkpoints_dir="checkpoints",
            epochs_per_checkpoint=1,  # Number of epochs between checkpoints, 0 for no checkpoints at all.
            save_final_model=True,  # Determines if the model is saved after training.
            use_best_as_final_model=True,  # Substitutes the saved final model with the best of the current run.

            ################################
            # Experiment Parameters        #
            ################################
            epochs=0,
            test_set_perc=0.05,  # Percentage of samples taken from the given id_list in __init__ for testing.
                                 # Ignored when self.id_list_train is already set. Note that self.id_list_test must be set then as well.
            val_set_perc=0.05,   # Percentage of samples taken from the given id_list in __init__ for validation.
                                 # Ignored when self.id_list_train is already set. Note that self.id_list_val should be set then as well.
            seed=1234,  # Used to initialize torch, numpy, and random. If None, the id_list is not shuffled before taking test and validation set from it.
            fp16_run=False,  # TODO: Not implemented.
            # distributed_run=False,  # TODO: Find out how distributed run works.
            # dist_url="file://distributed.dpt",
            # cudnn_enabled=True,
            # cudnn_benchmark=False,
            use_gpu=False,
            num_gpus=1,  # TODO: Change to num_devices.
            batch_first=False,  # Note: This might not be implemented properly everywhere.
            variable_sequence_length_train=None,  # Do samples in mini batches during training have variable length.
            variable_sequence_length_test=None,  # Do samples in mini batches during testing have variable length.
            shuffle_train_set=True,  # Shuffle in dataset to get mini batches.
            shuffle_val_set=False,  # Shuffle in dataset to get mini batches.
            batch_size_train=1,
            batch_size_test=48,
            # batch_size_val=1,  # TODO: Add again after finding all My* classes where it is missing.
            batch_size_benchmark=48,
            batch_size_synth=48,
            batch_size_gen_figure=48,
            dataset_num_workers_gpu=4,  # Number of workers used in dataset when running on GPU(s).
            dataset_num_workers_cpu=0,  # Number of workers used in dataset when running on CPU(s).
            dataset_pin_memory=True,
            dataset_load_async=True,
            teacher_forcing_in_test=False,  # If True, the targets are also given to the model when running the test (needed for WaveNet).
            preload_next_batch_to_gpu=False,  # If True loads the next batch to GPU while processing the current one.
                                              # This enhances GPU usage for the cost of memory, because two batches are loaded to the GPU.
                                              # TODO: This does not work yet, because cuda async does lazy loading.

            ################################
            # Data Parameters             #
            ################################
            len_in_out_multiplier=1,
            out_dir=None,

            ################################
            # Audio Parameters             #
            ################################
            # sampling_frequency=16000,  # TODO: Unused?
            frame_size=5,
            # max_wav_value=32768.0,

            ################################
            # Model Parameters             #
            ################################
            model_type=None,
            model_name=None,
            model_dir=None,  # Explicitly set directory where model is stored, otherwise dir_out/networks_dir/.
            dropout=0.0,
            hidden_init=0.0,  # Hidden state init value
            train_hidden_init=False,  # Is the hidden state init value trainable  # TODO: Unused?

            ################################
            # Optimization Hyperparameters #
            ################################
            loss_per_sample=False,  # If True the loss is first averaged on each sample and then over the batch.
                                    # If False the loss is averaged over each frame in the whole batch (default).
            backward_retain_graph=False,  # Determines if the gradient computation should do aggressive memory freeing.
                                          # Only needed when gradient computational graph is reused.
            optimiser_type="Adam",  # "Adam", "SGD"  TODO: more
            optimiser_args=dict(),  # Set optimiser arguments. Preferred way to set learning rate: optimiser_args["lr"]=...
            use_saved_learning_rate=True,  # Use the learning rate saved with a model after loading it.
            replace_inf_grads_by_zero=False,  # Automatically substitute +/- inf gradients with zero during training.
            # dynamic_loss_scaling=True,
            exponential_moving_average=False,  # TODO: Not implemented properly.
            exponential_moving_average_decay=0.9999,  # Ignored when exponential_moving_average is False.

            scheduler_type="default",  # "None", "Plateau", "Exponential","Noam",  TODO: "Step", "Cyclic_cosine"
            scheduler_args=dict(),
            iterations_per_scheduler_step=None,  # Number of training iterations after which the scheduler step function
                                                 # is called with the current loss and total number of iterations as parameter.
                                                 # If None the scheduler is not called.
            epochs_per_scheduler_step=None,  # Number of training epochs after which the scheduler step function is
                                             # called with the current validation loss and total number of epochs.
                                             # When a model is loaded the epoch number continues from the epoch number stored in the model.

            grad_clip_norm_type=None,  # If None no gradient clipping otherwise uses grad_clip_max_norm (small bias).
            grad_clip_max_norm=None,  # Ignored if grad_clip_norm_type is None.
            grad_clip_thresh=None,  # Clip absolute value of gradient (big bias).

            # Set optimiser or scheduler_type to ignore type configuration above. Used to try new implementations.
            optimiser=None,  # Will be called with model parameters only. Set other parameters with partial. Example: partial(torch.optim.Adam, **args)).
            scheduler=None,  # Will be called with optimiser only. Set other parameters with partial. Example: partial(ReduceLROnPlateau, **args)).

            ################################
            # Synthesis Parameters         #
            ################################
            synth_vocoder="WORLD",  # "WORLD", "r9y9wavenet_quantized_16k_world_feats"
            synth_ext="wav",  # Extension of the output audio.
            synth_fs=16000,
            num_coded_sps=60,  # Number of spectral features, currently always MGC.
            synth_dir=None,
            synth_acoustic_model_path=None,
            synth_file_suffix='',
            # do_post_filtering = False,  # TODO: Merlin does some filtering before calling its vocoder. Possible implementation: https://github.com/r9y9/nnmnkwii/blob/master/nnmnkwii/postfilters/__init__.py
            synth_gen_figure=False,
            gen_figure_ext=".pdf",
            epochs_per_plot=0,  # No plots per epoch with <= 0. # TODO: plot in run method each ... epochs.
            plot_per_epoch_id_list=None,  # TODO: Id(s) in the dictionary which are plotted.
        )

        if hparams_string:
            logging.info('Parsing command line hparams: %s', hparams_string)
            hparams.parse(hparams_string)

        if verbose:
            logging.info('Final parsed hparams: %s', hparams.values())

        return hparams
Ejemplo n.º 6
0
def main(model_dir, train_data, eval_data, vocab_file, hparams):
    tf.logging.set_verbosity(tf.logging.INFO)

    hparams_ = HParams(num_epochs=10,
                       batch_size=16,
                       max_steps=10000,
                       units=150,
                       layers=3,
                       dropout=0.0,
                       question_max_words=30,
                       passage_max_words=150,
                       predict_passage_max_words=800,
                       answer_max_words=50,
                       vocab_size=30000,
                       emb_size=300,
                       r=0.8,
                       cudnn=False,
                       grad_clip=5.0,
                       tgt_sos_id=1,
                       tgt_eos_id=2,
                       word_vocab_file=vocab_file)
    hparams_.parse(hparams)
    hparams = hparams_

    config = tf.ConfigProto()
    # config.intra_op_parallelism_threads = 32
    # config.inter_op_parallelism_threads = 32

    run_config = tf.estimator.RunConfig(log_step_count_steps=1,
                                        tf_random_seed=19830610,
                                        model_dir=model_dir,
                                        save_summary_steps=1,
                                        session_config=config)

    with tf.Session() as sess:
        test = input_fn([train_data],
                        hparams=hparams,
                        mode=tf.estimator.ModeKeys.EVAL,
                        batch_size=hparams.batch_size)

        print(sess.run([test]))

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       params=hparams,
                                       config=run_config)

    train_spec = tf.estimator.TrainSpec(
        input_fn=lambda: input_fn([train_data],
                                  hparams=hparams,
                                  mode=tf.estimator.ModeKeys.TRAIN,
                                  num_epochs=hparams.num_epochs,
                                  batch_size=hparams.batch_size),
        max_steps=hparams.max_steps,
        hooks=None)

    eval_spec = tf.estimator.EvalSpec(
        input_fn=lambda: input_fn([eval_data],
                                  hparams=hparams,
                                  mode=tf.estimator.ModeKeys.EVAL,
                                  batch_size=hparams.batch_size),
        exporters=[
            tf.estimator.LatestExporter(
                name=
                "predict",  # the name of the folder in which the model will be exported to under export
                serving_input_receiver_fn=partial(serving_input_fn,
                                                  params=hparams),
                exports_to_keep=1,
                as_text=True)
        ],
        steps=10,
        throttle_secs=1200)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Ejemplo n.º 7
0
def main(argv):
    # hparams = HParams(minibatch_size=8, bond_dim=8, delta_t=1/FLAGS.sample_rate, sigma=0.000001,
    #                   h_reg=200/(np.pi * FLAGS.sample_rate)**2, r_reg=2000/(np.pi * FLAGS.sample_rate),
    #                   initial_rank=None, A=100., learning_rate=0.001)

    hparams = HParams(minibatch_size=8,
                      bond_dim=8,
                      delta_t=1 / FLAGS.sample_rate,
                      sigma=0.0001,
                      h_reg=200 / (np.pi * FLAGS.sample_rate)**2,
                      r_reg=0.1,
                      initial_rank=None,
                      A=100.,
                      learning_rate=0.001)
    hparams.parse(FLAGS.hparams)

    with tf.variable_scope("data"):
        data = get_audio(datadir=FLAGS.datadir,
                         dataset=FLAGS.dataset,
                         hps=hparams)

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
        if FLAGS.mps_model == 'rho_mps':
            model = RhoCMPS(hparams=hparams, data_iterator=data)
        else:
            model = PsiCMPS(hparams=hparams, data_iterator=data)

        h_l2sqnorm = tf.reduce_sum(tf.square(model.freqs))
        r_l2sqnorm = tf.real(tf.reduce_sum(tf.conj(model.R) * model.R))

    with tf.variable_scope("total_loss"):
        total_loss = model.loss + hparams.h_reg * h_l2sqnorm \
                                    + hparams.r_reg * r_l2sqnorm

    with tf.variable_scope("summaries"):
        tf.summary.scalar("A", tf.cast(model.A, dtype=tf.float32))
        tf.summary.scalar("sigma", tf.cast(model.sigma, dtype=tf.float32))
        tf.summary.scalar("h_l2norm", tf.sqrt(h_l2sqnorm))
        tf.summary.scalar("r_l2norm", tf.sqrt(r_l2sqnorm))

        gr_rate = 2 * np.pi * hparams.sigma**2 * r_l2sqnorm / hparams.bond_dim
        tf.summary.scalar("gr_decay_time", 1 / gr_rate)

        tf.summary.scalar("model_loss", tf.reshape(model.loss, []))
        tf.summary.scalar("total_loss", tf.reshape(total_loss, []))

        tf.summary.audio("data",
                         data,
                         sample_rate=FLAGS.sample_rate,
                         max_outputs=5)
        tf.summary.histogram("frequencies", model.freqs / (2 * np.pi))

        if FLAGS.visualize:
            # Doesn't work for Datasets where batch size can't be inferred
            data_waveform_op = tfplot.autowrap(waveform_plot, batch=True)(
                data, hparams.minibatch_size * [hparams.delta_t])
            tf.summary.image("data_waveform", data_waveform_op)

            if FLAGS.num_samples != 0:
                samples = model.sample(FLAGS.num_samples,
                                       FLAGS.sample_duration)
                sample_waveform_op = tfplot.autowrap(
                    waveform_plot,
                    batch=True)(samples, FLAGS.num_samples * [hparams.delta_t])
                tf.summary.image("sample_waveform", sample_waveform_op)

    step = tf.get_variable("global_step", [],
                           tf.int64,
                           tf.zeros_initializer(),
                           trainable=False)
    train_op = tf.train.AdamOptimizer(
        learning_rate=hparams.learning_rate).minimize(total_loss,
                                                      global_step=step)

    # TODO Unrolling in time?

    tf.contrib.training.train(
        train_op,
        save_checkpoint_secs=60,
        logdir=
        f"{FLAGS.logdir}/{hparams.bond_dim}_{hparams.delta_t}_{hparams.minibatch_size}"
    )
Ejemplo n.º 8
0
def main(model_dir, train_data, eval_data, word_embeddings, char_embeddings,
         hparams, log_devices):
    tf.logging.set_verbosity(tf.logging.INFO)

    char_embeddings_np = load_embeddings(char_embeddings)

    if os.path.isfile(word_embeddings + '.npy'):
        word_embeddings_np = np.load(word_embeddings + '.npy')
    else:
        word_embeddings_np = load_embeddings(word_embeddings)
        np.save(word_embeddings, word_embeddings_np)

    hparams_ = HParams(num_epochs=10,
                       batch_size=16,
                       max_steps=100,
                       units=50,
                       layers=3,
                       dropout=0.0,
                       learning_rate=0.5,
                       question_max_words=30,
                       question_max_chars=16,
                       passage_max_words=800,
                       train_passage_max_words=400,
                       passage_max_chars=16,
                       vocab_size=word_embeddings_np.shape[0],
                       emb_size=300,
                       char_vocab_size=char_embeddings_np.shape[0],
                       char_emb_size=300,
                       word_vocab_file=word_embeddings,
                       char_vocab_file=char_embeddings,
                       passage_count=10,
                       train_passage_count=5,
                       passage_max_len=120,
                       r=0.8,
                       grad_clip=5.0,
                       attention='luong')
    hparams = hparams_.parse(hparams)
    print(hparams)

    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.log_device_placement = log_devices
    # config.intra_op_parallelism_threads = 32
    # config.inter_op_parallelism_threads = 32

    run_config = tf.estimator.RunConfig(log_step_count_steps=1,
                                        tf_random_seed=19830610,
                                        model_dir=model_dir,
                                        save_summary_steps=1,
                                        session_config=config)

    # with tf.Session() as sess:
    #     test = input_fn(
    #         [train_data],
    #         hparams=hparams,
    #         mode=tf.estimator.ModeKeys.EVAL,
    #         batch_size=hparams.batch_size
    #     )
    #
    #     print(sess.run([test]))

    estimator = tf.estimator.Estimator(model_fn=partial(
        model_fn,
        word_embeddings_np=word_embeddings_np,
        char_embeddings_np=char_embeddings_np),
                                       params=hparams,
                                       config=run_config)

    train_spec = tf.estimator.TrainSpec(
        input_fn=lambda: input_fn([train_data],
                                  hparams=hparams,
                                  mode=tf.estimator.ModeKeys.TRAIN,
                                  num_epochs=hparams.num_epochs,
                                  batch_size=hparams.batch_size),
        max_steps=hparams.max_steps,
        hooks=None)

    eval_spec = tf.estimator.EvalSpec(
        input_fn=lambda: input_fn([eval_data],
                                  hparams=hparams,
                                  mode=tf.estimator.ModeKeys.EVAL,
                                  batch_size=hparams.batch_size),
        exporters=[
            tf.estimator.LatestExporter(
                name=
                "predict",  # the name of the folder in which the model will be exported to under export
                serving_input_receiver_fn=partial(serving_input_fn,
                                                  params=hparams),
                exports_to_keep=1,
                as_text=True)
        ],
        steps=100,
        throttle_secs=1200)

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)