def run_experiment(dataset_path, model_architecture, model_params=None, num_epochs=5000, batch_size=100,
                   chunk_size=0, verbose=False, reshape_to=None, update_func_name='nesterov_momentum',
                   learning_rate=0.01, update_func_kwargs=None, adapt_learning_rate=False, subtract_mean=True,
                   labels_to_keep=None, snapshot_every=0, snapshot_prefix='model', start_from_snapshot=None,
                   snapshot_final_model=True, num_crops=0, crop_shape=None, mirror_crops=True, test_only=False):
    """Run a deep learning experiment, reporting results to standard output.

    Command line or in-process arguments:
     * dataset_path (str) - path of dataset pickle zip (see data.create_datasets)
     * model_architecture (str) - the name of the architecture to use (subclass of architectures.AbstractModelBuilder)
     * model_params (str) - colon-separated list of equals-separated key-value pairs to pass to the model builder.
                            All keys are assumed to be strings, while values are evaluated as Python literals
     * num_epochs (int) - number of training epochs to run
     * batch_size (int) - number of examples to feed to the network in each batch
     * chunk_size (int) - number of examples to copy to the GPU in each chunk. If it's zero, the chunk size is set to
                          the number of training examples, which results in faster training. However, it's impossible
                          when the size of the example set is larger than the GPU's memory
     * verbose (bool) - if True, extra debugging information will be printed
     * reshape_to (str) - if given, the data will be reshaped to match this string, which should evaluate to a Python
                          tuple of ints (e.g., may be required to make the dataset fit into a convnet input layer)
     * update_func_name (str) - update function to use to train the network. See functions with signature
                                lasagne.updates.<update_func_name>(loss_or_grads, params, learning_rate, **kwargs)
     * learning_rate (float) - learning rate to use with the update function
     * update_func_kwargs (str) - keyword arguments to pass to the update function in addition to learning_rate. This
                                  string has the same format as model_params
     * adapt_learning_rate (bool) - if True, the learning rate will be reduced by a factor of 10 when the validation
                                    loss hasn't decreased within _LEARNING_RATE_GRACE_PERIOD, down to a minimum of
                                    _MIN_LEARNING_RATE
     * subtract_mean (bool) - if True, the mean RGB value in the training set will be subtracted from all subsets
                              of the dataset
     * labels_to_keep (str) - comma-separated list of labels to keep -- all other labels will be dropped
     * snapshot_every (int) - if nonzero, a model snapshot will be save every snapshot_every number of epochs
     * snapshot_prefix (str) - prefix for saved snapshot files
     * start_from_snapshot (str) - path of model snapshot to start training from. Note: currently, the snapshot doesn't
                                   contain all the original hyperparameters, so running this command with
                                   start_from_snapshot still requires passing all the original command arguments
     * snapshot_final_model (bool) - if True, the final model snapshot will be saved
     * num_crops (int) - if non-zero, this number of random crops of the images will be used
     * crop_shape (str) - if given, specifies the shape of the crops to be created (converted to tuple like reshape_to)
     * mirror_crops (bool) - if True, every random crop will be mirrored horizontally, making the effective number of
                             crops 2 * num_crops
     * test_only (bool) - if True, no training will be performed, and results on the testing subset will be reported
    """
    # pylint: disable=too-many-locals,too-many-arguments
    assert theano.config.floatX == 'float32', 'Theano floatX must be float32 to ensure consistency with pickled dataset'
    if model_architecture not in ARCHITECTURE_NAME_TO_CLASS:
        raise ValueError('Unknown architecture %s (valid values: %s)' % (model_architecture,
                                                                         sorted(ARCHITECTURE_NAME_TO_CLASS)))
    # Set a static random seed for reproducibility
    np.random.seed(572893204)
    dataset, label_to_index = _load_data(dataset_path, reshape_to, subtract_mean, labels_to_keep=labels_to_keep)
    learning_rate_var = theano.shared(lasagne.utils.floatX(learning_rate))
    model_builder = ARCHITECTURE_NAME_TO_CLASS[model_architecture](
        dataset, output_dim=len(label_to_index), batch_size=batch_size, chunk_size=chunk_size, verbose=verbose,
        update_func_name=update_func_name, learning_rate=learning_rate_var,
        update_func_kwargs=parse_param_str(update_func_kwargs), num_crops=num_crops,
        crop_shape=literal_eval(crop_shape) if crop_shape else None, mirror_crops=mirror_crops
    )
    start_epoch, output_layer = _load_model_snapshot(start_from_snapshot) if start_from_snapshot else (0, None)
    output_layer, training_iter, validation_eval = model_builder.build(
        output_layer=output_layer, **parse_param_str(model_params)
    )

    if test_only:
        testing_loss, testing_accuracy = model_builder.create_eval_function('testing', output_layer)()
        print('Testing loss & accuracy:\t %.6f\t%.2f%%' % (testing_loss, testing_accuracy * 100))
        return

    _print_network_info(output_layer)
    try:
        _run_training_loop(output_layer, training_iter, validation_eval, num_epochs, snapshot_every, snapshot_prefix,
                           snapshot_final_model, start_epoch, learning_rate_var, adapt_learning_rate)
    except OverflowError, e:
        print('Divergence detected (OverflowError: %s). Stopping now.' % e)
def run_experiment(dataset_path,
                   model_architecture,
                   model_params=None,
                   num_epochs=5000,
                   batch_size=100,
                   chunk_size=0,
                   verbose=False,
                   reshape_to=None,
                   update_func_name='nesterov_momentum',
                   learning_rate=0.01,
                   update_func_kwargs=None,
                   adapt_learning_rate=False,
                   subtract_mean=True,
                   labels_to_keep=None,
                   snapshot_every=0,
                   snapshot_prefix='model',
                   start_from_snapshot=None,
                   snapshot_final_model=True,
                   num_crops=0,
                   crop_shape=None,
                   mirror_crops=True,
                   test_only=False):
    """Run a deep learning experiment, reporting results to standard output.

    Command line or in-process arguments:
     * dataset_path (str) - path of dataset pickle zip (see data.create_datasets)
     * model_architecture (str) - the name of the architecture to use (subclass of architectures.AbstractModelBuilder)
     * model_params (str) - colon-separated list of equals-separated key-value pairs to pass to the model builder.
                            All keys are assumed to be strings, while values are evaluated as Python literals
     * num_epochs (int) - number of training epochs to run
     * batch_size (int) - number of examples to feed to the network in each batch
     * chunk_size (int) - number of examples to copy to the GPU in each chunk. If it's zero, the chunk size is set to
                          the number of training examples, which results in faster training. However, it's impossible
                          when the size of the example set is larger than the GPU's memory
     * verbose (bool) - if True, extra debugging information will be printed
     * reshape_to (str) - if given, the data will be reshaped to match this string, which should evaluate to a Python
                          tuple of ints (e.g., may be required to make the dataset fit into a convnet input layer)
     * update_func_name (str) - update function to use to train the network. See functions with signature
                                lasagne.updates.<update_func_name>(loss_or_grads, params, learning_rate, **kwargs)
     * learning_rate (float) - learning rate to use with the update function
     * update_func_kwargs (str) - keyword arguments to pass to the update function in addition to learning_rate. This
                                  string has the same format as model_params
     * adapt_learning_rate (bool) - if True, the learning rate will be reduced by a factor of 10 when the validation
                                    loss hasn't decreased within _LEARNING_RATE_GRACE_PERIOD, down to a minimum of
                                    _MIN_LEARNING_RATE
     * subtract_mean (bool) - if True, the mean RGB value in the training set will be subtracted from all subsets
                              of the dataset
     * labels_to_keep (str) - comma-separated list of labels to keep -- all other labels will be dropped
     * snapshot_every (int) - if nonzero, a model snapshot will be save every snapshot_every number of epochs
     * snapshot_prefix (str) - prefix for saved snapshot files
     * start_from_snapshot (str) - path of model snapshot to start training from. Note: currently, the snapshot doesn't
                                   contain all the original hyperparameters, so running this command with
                                   start_from_snapshot still requires passing all the original command arguments
     * snapshot_final_model (bool) - if True, the final model snapshot will be saved
     * num_crops (int) - if non-zero, this number of random crops of the images will be used
     * crop_shape (str) - if given, specifies the shape of the crops to be created (converted to tuple like reshape_to)
     * mirror_crops (bool) - if True, every random crop will be mirrored horizontally, making the effective number of
                             crops 2 * num_crops
     * test_only (bool) - if True, no training will be performed, and results on the testing subset will be reported
    """
    # pylint: disable=too-many-locals,too-many-arguments
    assert theano.config.floatX == 'float32', 'Theano floatX must be float32 to ensure consistency with pickled dataset'
    if model_architecture not in ARCHITECTURE_NAME_TO_CLASS:
        raise ValueError(
            'Unknown architecture %s (valid values: %s)' %
            (model_architecture, sorted(ARCHITECTURE_NAME_TO_CLASS)))
    # Set a static random seed for reproducibility
    np.random.seed(572893204)
    dataset, label_to_index = _load_data(dataset_path,
                                         reshape_to,
                                         subtract_mean,
                                         labels_to_keep=labels_to_keep)
    learning_rate_var = theano.shared(lasagne.utils.floatX(learning_rate))
    model_builder = ARCHITECTURE_NAME_TO_CLASS[model_architecture](
        dataset,
        output_dim=len(label_to_index),
        batch_size=batch_size,
        chunk_size=chunk_size,
        verbose=verbose,
        update_func_name=update_func_name,
        learning_rate=learning_rate_var,
        update_func_kwargs=parse_param_str(update_func_kwargs),
        num_crops=num_crops,
        crop_shape=literal_eval(crop_shape) if crop_shape else None,
        mirror_crops=mirror_crops)
    start_epoch, output_layer = _load_model_snapshot(
        start_from_snapshot) if start_from_snapshot else (0, None)
    output_layer, training_iter, validation_eval = model_builder.build(
        output_layer=output_layer, **parse_param_str(model_params))

    if test_only:
        testing_loss, testing_accuracy = model_builder.create_eval_function(
            'testing', output_layer)()
        print('Testing loss & accuracy:\t %.6f\t%.2f%%' %
              (testing_loss, testing_accuracy * 100))
        return

    _print_network_info(output_layer)
    try:
        _run_training_loop(output_layer, training_iter, validation_eval,
                           num_epochs, snapshot_every, snapshot_prefix,
                           snapshot_final_model, start_epoch,
                           learning_rate_var, adapt_learning_rate)
    except OverflowError, e:
        print('Divergence detected (OverflowError: %s). Stopping now.' % e)
def search_hyperparams(base_cmd, log_dir, base_model_params=None, model_params_space=None, max_evals=10,
                       learning_rate_range=None, disabled_hyperparams=None):
    """Run a sequential hyperparameter search using hyperopt.fmin().

    To enable restartability and reproducibility, each hyperparameter combination is evaluated by calling base_cmd
    with the extra hyperparameters, and logging the results to a (hopefully) unique file in log_dir.

    The experiment-running command is called in a separate shell rather than in-process, because it may depend on
    global random state. Calling run_experiment() successively in-process would hurt reproducibility.

    Arguments:
     * base_cmd (str) - base command line to call, typically "python manage.py run_experiment ..." with a small number
                        of epochs (see experiment.run_experiment())
     * log_dir (str) - path where the outputs of individual runs will be stored.
                       Note: each run's filename is "experiment.<command_line_hash>.log", meaning that collisions are
                       possible in very rare cases
     * base_model_params (str) - model_params to pass to the run_experiment command that are not part of the search
     * model_params_space (str) - model_params to experiment with, in the same format as normal model_params, except
                                  that each key is of the form <param_name>__<hp_func_name>, where hp_func_name is
                                  a member of hyperopt.hp, and the parameter value is interpreted as arguments to
                                  pass to hp_func_name.
                                  For example, if the architecture is ConvNet, "ld0_dropout__uniform=0.0,0.75" will
                                  experiment with dropout values drawn from uniform(0.0, 0.75) for the dropout layer
                                  that comes after the first dense layer.
     * max_evals (int) - number of experiments to run.
                         Note: it's possible to run once with a small value of max_evals, and the do a subsequent run
                         with an increased number of experiments. The second run will read the results of the first run
                         and continue from the point where that run stopped.
     * learning_rate_range (str) - a pair of comma-separated values that specifies the range from which the
                                   learning_rate will be drawn, according to hyperopt.hp.loguniform
     * disabled_hyperparams (str) - comma-separated list of hyperparameters with which no experimentation should be done
    """

    if os.path.exists(log_dir):
        warn('Log directory %s exists. Existing log files may be read to avoid repeating experiments.' % log_dir)
    else:
        os.makedirs(log_dir)

    learning_rate_range = literal_eval(learning_rate_range) if learning_rate_range else (-12, -5)
    model_params = parse_param_str(base_model_params)
    for param_name_and_hp_func, hp_func_args in parse_param_str(model_params_space).iteritems():
        param_name, hp_func_name = param_name_and_hp_func.split('__')
        model_params[param_name] = getattr(hyperopt.hp, hp_func_name)(param_name, *hp_func_args)
    space = dict(
        update_func=hyperopt.hp.choice('update_func', [
            dict(name='adam',
                 beta1=hyperopt.hp.uniform('beta1', 0.0, 0.9),
                 beta2=hyperopt.hp.uniform('beta2', 0.99, 1.0)),
            dict(name='nesterov_momentum',
                 momentum=hyperopt.hp.uniform('momentum', 0.5, 1.0))
        ]),
        learning_rate=hyperopt.hp.loguniform('learning_rate', *learning_rate_range),
        mirror_crops=hyperopt.hp.choice('mirror_crops', [False, True]),
        num_crops=hyperopt.hp.choice('num_crops', [1, 5]),
        model_params=model_params
    )
    for hyperparam in (disabled_hyperparams.split(',') if disabled_hyperparams else ()):
        del space[hyperparam]

    trials = hyperopt.Trials()
    hyperopt.fmin(lambda param_dict: _eval_objective(param_dict, log_dir, base_cmd), space=space,
                  algo=hyperopt.tpe.suggest, trials=trials, max_evals=max_evals)
    print('---\nBest command line: %(cmd)s\nError rate: %(loss).2f%%' % trials.best_trial['result'])