Exemple #1
0
def run_bot(batch_sampling_method,
			window_size,
			stride,
			batch_size,
			num_training_steps,
			learning_rate,
			geometric_decay,
			conv_layers_separable,
			len_conv1_filters,
			num_conv1_features,
			num_conv2_features,
			num_fc1_neurons,
			model_ending,
			dropout_keep_prob) -> float:
	# TODO: remove hardcoding of args (metaclass? couldn't get args to unpack
	hparams = HParams(**inspect.getargvalues(inspect.currentframe())[3])
	hparam_dict = hparams.values()
	print(hparam_dict)
	bot = crypto_bot.CryptoBot(hparams, set_params(), test=TEST, tuning=True)
	cost = -bot.get_value()
	master_entry = {}
	lowest_cost = 1.0
	#with open('hparams_master.pickle', 'rb') as handle:
	#	master_entry = pickle.load(handle)
	#master_entry[cost] = hparam_dict
	#with open('hparams_master.pickle'.format(basedir), 'wb') as handle:
	#	pickle.dump(master_entry, handle, protocol=pickle.HIGHEST_PROTOCOL)
	# TODO: figure out another method of logging because this can't be a part of the class
#	if cost < self.lowest_cost:
#		self.lowest_cost = cost
	return cost
Exemple #2
0
        def _load_hparams(path):
            with open(os.path.join(path, 'hparams.json'), 'rb') as json_file:
                hparams_dict = {
                    k.encode('utf-8'): v.encode('utf-8') if type(v) == unicode else v
                    for k, v in json.load(json_file).iteritems()
                }

                hparams = HParams(**hparams_dict)
                hparams.set_hparam('data_dir', path)

            trainer_lib.add_problem_hparams(hparams, 'translate_mmt')

            # Removing dropout from HParams even on TRAIN mode
            for key in hparams.values():
                if key.endswith("dropout"):
                    setattr(hparams, key, 0.0)

            return hparams
Exemple #3
0
def run_experiment(unused_argv):
    """Run the training experiment."""
    hyperparameters_dict = FLAGS.__flags

    # Build the hyperparameters object
    params = HParams(**hyperparameters_dict)

    # Set the seeds
    np.random.seed(params.random_seed)
    tf.set_random_seed(params.random_seed)

    # Initialise the run config
    run_config = tf.contrib.learn.RunConfig()

    # Use JIT XLA
    session_config = tf.ConfigProto()
    if params.use_jit_xla:
        session_config.graph_options.optimizer_options.global_jit_level = (
            tf.OptimizerOptions.ON_1)

    # Clean the model directory
    if os.path.exists(params.model_dir) and params.clean_model_dir:
        shutil.rmtree(params.model_dir)

    # Update the run config
    run_config = run_config.replace(tf_random_seed=params.random_seed)
    run_config = run_config.replace(model_dir=params.model_dir)
    run_config = run_config.replace(session_config=session_config)
    run_config = run_config.replace(
        save_checkpoints_steps=params.min_eval_frequency)

    # Output relevant info for inference
    ex.save_dict_json(d=params.values(),
                      path=os.path.join(params.model_dir, 'params.dict'),
                      verbose=True)
    ex.save_obj(obj=params,
                path=os.path.join(params.model_dir, 'params.pkl'),
                verbose=True)

    learn_runner.run(experiment_fn=ex.experiment_fn,
                     run_config=run_config,
                     schedule='train_and_evaluate',
                     hparams=params)
Exemple #4
0
    def create_hparams(hparams_string=None, verbose=False):
        """Create model hyperparameters. Parse nondefault from given string."""

        hparams = HParams(
            ################################
            # General Parameters           #
            ################################

            logging_batch_index_perc=10,  # Percentage of samples used from the full dataset between logging the loss for training and testing.
            start_with_test=True,  # Determines if the model is tested first before any training loops.
                                   # The computed loss is also used to identify the best model so far.
                                   # Therefore, if this is False and use_best_as_final_model is True
                                   # the best model of the current training will be saved, which possibly
                                   # overrides an older better model.
            log_memory_consumption=True,
            epochs_per_test=1,  # Number of training epochs before testing (NOTE that this includes the scheduler_type with epoch scheduling).

            networks_dir="nn",
            checkpoints_dir="checkpoints",
            epochs_per_checkpoint=1,  # Number of epochs between checkpoints, 0 for no checkpoints at all.
            save_final_model=True,  # Determines if the model is saved after training.
            use_best_as_final_model=True,  # Substitutes the saved final model with the best of the current run.

            ################################
            # Experiment Parameters        #
            ################################
            epochs=0,
            test_set_perc=0.05,  # Percentage of samples taken from the given id_list in __init__ for testing.
                                 # Ignored when self.id_list_train is already set. Note that self.id_list_test must be set then as well.
            val_set_perc=0.05,   # Percentage of samples taken from the given id_list in __init__ for validation.
                                 # Ignored when self.id_list_train is already set. Note that self.id_list_val should be set then as well.
            seed=1234,  # Used to initialize torch, numpy, and random. If None, the id_list is not shuffled before taking test and validation set from it.
            fp16_run=False,  # TODO: Not implemented.
            # distributed_run=False,  # TODO: Find out how distributed run works.
            # dist_url="file://distributed.dpt",
            # cudnn_enabled=True,
            # cudnn_benchmark=False,
            use_gpu=False,
            num_gpus=1,  # TODO: Change to num_devices.
            batch_first=False,  # Note: This might not be implemented properly everywhere.
            variable_sequence_length_train=None,  # Do samples in mini batches during training have variable length.
            variable_sequence_length_test=None,  # Do samples in mini batches during testing have variable length.
            shuffle_train_set=True,  # Shuffle in dataset to get mini batches.
            shuffle_val_set=False,  # Shuffle in dataset to get mini batches.
            batch_size_train=1,
            batch_size_test=48,
            # batch_size_val=1,  # TODO: Add again after finding all My* classes where it is missing.
            batch_size_benchmark=48,
            batch_size_synth=48,
            batch_size_gen_figure=48,
            dataset_num_workers_gpu=4,  # Number of workers used in dataset when running on GPU(s).
            dataset_num_workers_cpu=0,  # Number of workers used in dataset when running on CPU(s).
            dataset_pin_memory=True,
            dataset_load_async=True,
            teacher_forcing_in_test=False,  # If True, the targets are also given to the model when running the test (needed for WaveNet).
            preload_next_batch_to_gpu=False,  # If True loads the next batch to GPU while processing the current one.
                                              # This enhances GPU usage for the cost of memory, because two batches are loaded to the GPU.
                                              # TODO: This does not work yet, because cuda async does lazy loading.

            ################################
            # Data Parameters             #
            ################################
            len_in_out_multiplier=1,
            out_dir=None,

            ################################
            # Audio Parameters             #
            ################################
            # sampling_frequency=16000,  # TODO: Unused?
            frame_size=5,
            # max_wav_value=32768.0,

            ################################
            # Model Parameters             #
            ################################
            model_type=None,
            model_name=None,
            model_dir=None,  # Explicitly set directory where model is stored, otherwise dir_out/networks_dir/.
            dropout=0.0,
            hidden_init=0.0,  # Hidden state init value
            train_hidden_init=False,  # Is the hidden state init value trainable  # TODO: Unused?

            ################################
            # Optimization Hyperparameters #
            ################################
            loss_per_sample=False,  # If True the loss is first averaged on each sample and then over the batch.
                                    # If False the loss is averaged over each frame in the whole batch (default).
            backward_retain_graph=False,  # Determines if the gradient computation should do aggressive memory freeing.
                                          # Only needed when gradient computational graph is reused.
            optimiser_type="Adam",  # "Adam", "SGD"  TODO: more
            optimiser_args=dict(),  # Set optimiser arguments. Preferred way to set learning rate: optimiser_args["lr"]=...
            use_saved_learning_rate=True,  # Use the learning rate saved with a model after loading it.
            replace_inf_grads_by_zero=False,  # Automatically substitute +/- inf gradients with zero during training.
            # dynamic_loss_scaling=True,
            exponential_moving_average=False,  # TODO: Not implemented properly.
            exponential_moving_average_decay=0.9999,  # Ignored when exponential_moving_average is False.

            scheduler_type="default",  # "None", "Plateau", "Exponential","Noam",  TODO: "Step", "Cyclic_cosine"
            scheduler_args=dict(),
            iterations_per_scheduler_step=None,  # Number of training iterations after which the scheduler step function
                                                 # is called with the current loss and total number of iterations as parameter.
                                                 # If None the scheduler is not called.
            epochs_per_scheduler_step=None,  # Number of training epochs after which the scheduler step function is
                                             # called with the current validation loss and total number of epochs.
                                             # When a model is loaded the epoch number continues from the epoch number stored in the model.

            grad_clip_norm_type=None,  # If None no gradient clipping otherwise uses grad_clip_max_norm (small bias).
            grad_clip_max_norm=None,  # Ignored if grad_clip_norm_type is None.
            grad_clip_thresh=None,  # Clip absolute value of gradient (big bias).

            # Set optimiser or scheduler_type to ignore type configuration above. Used to try new implementations.
            optimiser=None,  # Will be called with model parameters only. Set other parameters with partial. Example: partial(torch.optim.Adam, **args)).
            scheduler=None,  # Will be called with optimiser only. Set other parameters with partial. Example: partial(ReduceLROnPlateau, **args)).

            ################################
            # Synthesis Parameters         #
            ################################
            synth_vocoder="WORLD",  # "WORLD", "r9y9wavenet_quantized_16k_world_feats"
            synth_ext="wav",  # Extension of the output audio.
            synth_fs=16000,
            num_coded_sps=60,  # Number of spectral features, currently always MGC.
            synth_dir=None,
            synth_acoustic_model_path=None,
            synth_file_suffix='',
            # do_post_filtering = False,  # TODO: Merlin does some filtering before calling its vocoder. Possible implementation: https://github.com/r9y9/nnmnkwii/blob/master/nnmnkwii/postfilters/__init__.py
            synth_gen_figure=False,
            gen_figure_ext=".pdf",
            epochs_per_plot=0,  # No plots per epoch with <= 0. # TODO: plot in run method each ... epochs.
            plot_per_epoch_id_list=None,  # TODO: Id(s) in the dictionary which are plotted.
        )

        if hparams_string:
            logging.info('Parsing command line hparams: %s', hparams_string)
            hparams.parse(hparams_string)

        if verbose:
            logging.info('Final parsed hparams: %s', hparams.values())

        return hparams