def execute(dataset,
            n_hidden_u,
            n_hidden_t_enc,
            n_hidden_t_dec,
            n_hidden_s,
            embedding_source=histo_GenotypicFrequency_perclass,
            additional_unsup_input=None,
            num_epochs=500,
            learning_rate=.001,
            learning_rate_annealing=1.0,
            alpha=1,
            beta=1,
            delta=1,
            gamma=1,
            lmd=.0001,
            disc_nonlinearity="sigmoid",
            encoder_net_init=0.2,
            decoder_net_init=0.2,
            optimizer="rmsprop",
            max_patience=100,
            batchnorm=0,
            input_dropout=1.0,
            embedding_noise=0.0,
            keep_labels=1.0,
            prec_recall_cutoff=True,
            missing_labels_val=-1.0,
            which_fold=0,
            early_stop_criterion='loss_sup_det',
            input_decoder_mode="regression",
            save_path='/Users/Marie-Elyse/Downloads/embedding2',
            save_copy='/Users/Marie-Elyse/Downloads/embedding2',
            dataset_path='/Users/Marie-Elyse/Downloads/embedding2',
            resume=False,
            exp_name='',
            random_proj=0,
            bootstrap_snp_embeddings=0,
            bootstrap_cutoff=0.9):

    # Prepare embedding information :
    # - If no embedding is specified, use the transposed input matrix
    # - If a file is specified, use it's content as feature embeddings
    # - Else (a embedding category like  'histo3x26' is provided), load a
    #   pregenerated embedding of the specified category
    if embedding_source is None or embedding_source == "raw":
        embedding_source = None
        embedding_input = 'raw'
    elif os.path.exists(embedding_source):
        embedding_input = embedding_source
    else:
        embedding_input = embedding_source
        embedding_source = os.path.join(
            dataset_path, embedding_input + '_fold' + str(which_fold) + '.npy')

    # Load the dataset
    print("Loading data")
    (x_train, y_train, exmpl_ids_train, x_valid, y_valid, exmpl_ids_valid,
     x_test, y_test, exmpl_ids_test, x_unsup, training_labels, feature_names,
     label_names) = mlh.load_data(dataset,
                                  dataset_path,
                                  embedding_source,
                                  which_fold=which_fold,
                                  keep_labels=keep_labels,
                                  missing_labels_val=missing_labels_val,
                                  embedding_input=embedding_input,
                                  norm=False)

    # Load the additional unsupervised data, if some is specified
    if additional_unsup_input is not None:
        print("Adding additional data to the model's unsupervised inputs")
        paths = additional_unsup_input.split(";")
        additional_unsup_data = [np.load(p) for p in paths]
        print(x_unsup.shape)
        x_unsup = np.hstack(additional_unsup_data + [x_unsup])
        print(x_unsup.shape)

    if x_unsup is not None:
        n_samples_unsup = x_unsup.shape[1]
    else:
        n_samples_unsup = 0

    original_x_train = x_train.copy()
    original_x_valid = x_valid.copy()
    original_x_test = x_test.copy()

    # Change how the missing data values are encoded. Right now they are
    # encoded as being the mean of the corresponding feature so that, after
    # feature normalization, they will be 0s. However, this prevents us from
    # transfering the minibatch data as int8 so we replace those values with -1s.
    for i in range(x_train.shape[1]):
        feature_mean = x_train[:, i].mean()
        x_train[:, i] = mh.replace_arr_value(x_train[:, i], feature_mean, -1)
        x_valid[:, i] = mh.replace_arr_value(x_valid[:, i], feature_mean, -1)
        x_test[:, i] = mh.replace_arr_value(x_test[:, i], feature_mean, -1)
    x_train = x_train.astype("int8")
    x_valid = x_valid.astype("int8")
    x_test = x_test.astype("int8")

    # Normalize the input data. The mlh.load_data() function already offers
    # this feature but we need to do it here so that we will have access to
    # both the normalized and unnormalized input data.
    norm_mus = original_x_train.mean(axis=0)
    norm_sigmas = original_x_train.std(axis=0) + 1e-6

    #x_train = (x_train - norm_mus[None, :]) / norm_sigmas[None, :]
    #x_valid = (x_valid - norm_mus[None, :]) / norm_sigmas[None, :]
    #x_test = (x_test - norm_mus[None, :]) / norm_sigmas[None, :]

    #x_train *= (315345. / 553107)
    #x_valid *= (315345. / 553107)
    #x_test *= (315345. / 553107)

    # Setup variables to build the right type of decoder bases on the value of
    # `input_decoder_mode`
    assert input_decoder_mode in ["regression", "classification"]
    if input_decoder_mode == "regression":
        # The size of the input reconstruction will be the same as the number
        # of inputs
        decoder_encoder_unit_ratio = 1
    elif input_decoder_mode == "classification":
        # # The size of the input reconstruction will be the N times larger as
        # the number of inputs where N is the number of distinct discrete
        # values that each input can take. For SNP input data with an additive
        # coding scheme, N=3 because the 3 possible values are : {0, 1, 2}.
        nb_discrete_vals_by_input = int(original_x_train.max() + 1)
        decoder_encoder_unit_ratio = nb_discrete_vals_by_input

        # Print baseline accuracy for the imputation of genes
        print("Distribution of input values in valid: %f %f %f" %
              ((original_x_train == 0).mean(), (original_x_train == 1).mean(),
               (original_x_train == 2).mean()))
        print("Distribution of input values in test: %f %f %f" %
              ((original_x_test == 0).mean(), (original_x_test == 1).mean(),
               (original_x_test == 2).mean()))

    # Extract required information from data
    n_samples, n_feats = x_train.shape
    print("Number of features : ", n_feats)
    print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1]))
    n_targets = y_train.shape[1] if y_train.ndim == 2 else y_train.max() + 1

    # Set some variables
    batch_size = 138
    beta = gamma if (gamma == 0) else beta

    # Generate an name for the experiment based on the hyperparameters used
    if embedding_source is None:
        embedding_name = embedding_input
    else:
        embedding_name = embedding_source.replace("_", "").split(".")[0]
        exp_name += embedding_name.rsplit('/', 1)[::-1][0] + '_'

    exp_name += mlh.define_exp_name(
        keep_labels, alpha, beta, gamma, lmd, n_hidden_u, n_hidden_t_enc,
        n_hidden_t_dec, n_hidden_s, which_fold, learning_rate,
        decoder_net_init, encoder_net_init, batchnorm, input_dropout,
        embedding_noise, early_stop_criterion, learning_rate_annealing,
        input_decoder_mode)
    print("Experiment: " + exp_name)

    # Ensure that the folders where the results of the experiment will be
    # saved do exist. Create them if they don't.
    save_path = os.path.join(save_path, dataset, exp_name)
    save_copy = os.path.join(save_copy, dataset, exp_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    if not os.path.exists(save_copy):
        os.makedirs(save_copy)

    # Prepare Theano variables for inputs and targets
    input_var_sup = T.bmatrix('input_sup')
    input_var_unsup = theano.shared(x_unsup, 'input_unsup')  # x_unsup TBD
    target_var_sup = T.matrix('target_sup')
    lr = theano.shared(np.float32(learning_rate), 'learning_rate')

    # Use the provided mus and sigmas to process the missing values and
    # normalize the inputs
    b_input_var_sup = input_var_sup.astype("float32")
    normed_input_sup = (T.eq(b_input_var_sup, -1) * norm_mus +
                        T.neq(b_input_var_sup, -1) * b_input_var_sup)
    normed_input_sup = (normed_input_sup - norm_mus) / norm_sigmas

    reconst_target_sup = T.cast(input_var_sup, "int32")

    # Build model
    print("Building model")

    # Some checkings
    # assert len(n_hidden_u) > 0
    assert len(n_hidden_t_enc) > 0
    assert len(n_hidden_t_dec) > 0
    assert n_hidden_t_dec[-1] == n_hidden_t_enc[-1]

    # Build feature embedding networks (encoding and decoding if gamma > 0)
    nets, embeddings, pred_feat_emb = mh.build_feat_emb_nets(
        embedding_source, n_feats, n_samples_unsup, input_var_unsup,
        n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, gamma, encoder_net_init,
        decoder_net_init, save_path, random_proj, decoder_encoder_unit_ratio,
        embedding_noise)

    # Build feature embedding reconstruction networks (if alpha > 0, beta > 0)
    nets += mh.build_feat_emb_reconst_nets(
        [alpha, beta], n_samples_unsup, n_hidden_u,
        [n_hidden_t_enc, n_hidden_t_dec], nets,
        [encoder_net_init, decoder_net_init])

    # Supervised network
    discrim_net, hidden_rep = mh.build_discrim_net(
        batch_size, n_feats, normed_input_sup, n_hidden_t_enc, n_hidden_s,
        embeddings[0], disc_nonlinearity, n_targets, batchnorm, input_dropout)

    # Reconstruct network
    nets += [
        mh.build_reconst_net(hidden_rep,
                             embeddings[1] if len(embeddings) > 1 else None,
                             n_feats * decoder_encoder_unit_ratio, gamma,
                             decoder_encoder_unit_ratio)
    ]

    # Load weights if we are resuming job
    if resume:
        # Load best model
        with np.load(os.path.join(save_copy, 'dietnet_best.npz')) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        nlayers = len(
            lasagne.layers.get_all_params(filter(None, nets) + [discrim_net]))
        #lasagne.layers.set_all_param_values(filter(None, nets) +
        #                                    [discrim_net],
        #                                    param_values[:nlayers])

        params = lasagne.layers.get_all_params(
            filter(None, nets) + [discrim_net])
        for p, v in zip(params, param_values[:nlayers]):
            # Do not overwrite embedding value with old embedding. Removing
            # the following condition will prevent a trained model from being
            # tested on a different dataset
            if p.name != "feat_emb":
                p.set_value(v)

    print("Building and compiling training functions")

    # Build and compile training functions
    predictions, predictions_det = mh.define_predictions(nets, start=2)
    prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net])
    prediction_sup = prediction_sup[0]
    prediction_sup_det = prediction_sup_det[0]

    # Define losses
    # reconstruction losses
    if input_decoder_mode == "regression":
        reconst_losses, reconst_losses_det = mh.define_reconst_losses(
            predictions, predictions_det,
            [input_var_unsup, input_var_unsup, normed_input_sup])
    elif input_decoder_mode == "classification":
        # Obtain regular reconstruction losses for every reconstruction
        # but the reconstruction of the supervised input data
        reconst_losses1, reconst_losses_det1 = mh.define_reconst_losses(
            predictions[:-1], predictions_det[:-1],
            [input_var_unsup, input_var_unsup])

        # Obtain a "classification" reconstruction loss for the reconstruction
        # of the supervised input data. This classification loss will be
        # performed on the input data without normalization
        reconst_losses2, reconst_losses_det2 = mh.define_classif_reconst_losses(
            predictions[-1:], predictions_det[-1:], [reconst_target_sup],
            [decoder_encoder_unit_ratio])

        reconst_losses = reconst_losses1 + reconst_losses2
        reconst_losses_det = reconst_losses_det1 + reconst_losses_det2

    # supervised loss
    sup_loss, sup_loss_det = mh.define_sup_loss(disc_nonlinearity,
                                                prediction_sup,
                                                prediction_sup_det,
                                                keep_labels, target_var_sup,
                                                missing_labels_val)

    # Define inputs
    inputs = [input_var_sup, target_var_sup]

    # Define parameters
    params = lasagne.layers.get_all_params([discrim_net] + filter(None, nets),
                                           trainable=True,
                                           unwrap_shared=False)
    params_to_freeze= \
        lasagne.layers.get_all_params(filter(None, nets), trainable=False,
                                      unwrap_shared=False)

    # Remove unshared variables from params and params_to_freeze
    params = [
        p for p in params
        if isinstance(p, theano.compile.sharedvalue.SharedVariable)
    ]
    params_to_freeze = [
        p for p in params_to_freeze
        if isinstance(p, theano.compile.sharedvalue.SharedVariable)
    ]
    print("Params : ", params)

    feat_emb_var = next(p for p in lasagne.layers.get_all_params([discrim_net])
                        if p.name == 'input_unsup' or p.name == 'feat_emb')
    # feat_emb_var = lasagne.layers.get_all_params([discrim_net])[0]
    print(feat_emb_var)
    feat_emb_val = feat_emb_var.get_value()
    feat_emb_norms = (feat_emb_val**2).sum(0)**0.5
    feat_emb_var.set_value(feat_emb_val / feat_emb_norms)

    print('Number of params discrim: ' + str(len(params)))
    print('Number of params to freeze: ' + str(len(params_to_freeze)))

    for p in params_to_freeze:
        new_params = [el for el in params if el != p]
        params = new_params

    print('Number of params to update: ' + str(len(params)))

    # Combine losses
    loss = delta*sup_loss + alpha*reconst_losses[0] + beta*reconst_losses[1] + \
        gamma*reconst_losses[2]
    loss_det = delta*sup_loss_det + alpha*reconst_losses_det[0] + \
        beta*reconst_losses_det[1] + gamma*reconst_losses_det[2]

    l2_penalty = apply_penalty(params, l2)
    loss = loss + lmd * l2_penalty
    loss_det = loss_det + lmd * l2_penalty

    # Compute network updates
    assert optimizer in ["rmsprop", "adam", "amsgrad"]
    if optimizer == "rmsprop":
        updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr)
    elif optimizer == "adam":
        updates = lasagne.updates.adam(loss, params, learning_rate=lr)
    elif optimizer == "amsgrad":
        updates = lasagne.updates.amsgrad(loss, params, learning_rate=lr)
    #updates = lasagne.updates.sgd(loss,
    #                              params,
    #                              learning_rate=lr)
    # updates = lasagne.updates.momentum(loss, params,
    #                                    learning_rate=lr, momentum=0.0)

    # Apply norm constraints on the weights
    for k in updates.keys():
        if updates[k].ndim == 2:
            updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0)

    # Compile training function
    train_fn = theano.function(inputs,
                               loss,
                               updates=updates,
                               on_unused_input='ignore')

    # Monitoring Labels
    monitor_labels = [
        "reconst. feat. W_enc", "reconst. feat. W_dec", "reconst. loss"
    ]
    monitor_labels = [
        i for i, j in zip(monitor_labels, reconst_losses) if j != 0
    ]
    monitor_labels += ["feat. W_enc. mean", "feat. W_enc var"]
    monitor_labels += ["feat. W_dec. mean", "feat. W_dec var"] if \
        (embeddings[1] is not None) else []
    monitor_labels += ["loss. sup.", "total loss"]

    # Build and compile test function
    val_outputs = reconst_losses_det
    val_outputs = [i for i, j in zip(val_outputs, reconst_losses) if j != 0]
    val_outputs += [embeddings[0].mean(), embeddings[0].var()]
    val_outputs += [embeddings[1].mean(), embeddings[1].var()] if \
        (embeddings[1] is not None) else []
    val_outputs += [sup_loss_det, loss_det]

    # Compute supervised accuracy and add it to monitoring list
    test_acc, test_pred = mh.define_test_functions(disc_nonlinearity,
                                                   prediction_sup,
                                                   prediction_sup_det,
                                                   target_var_sup)
    monitor_labels.append("accuracy")
    val_outputs.append(test_acc)

    # If appropriate, compute the input reconstruction accuracy and add it to
    # the monitoring list
    if input_decoder_mode == "classification":
        input_reconst_acc = mh.define_classif_reconst_acc(
            predictions_det[-1], reconst_target_sup,
            decoder_encoder_unit_ratio)
        #import pdb; pdb.set_trace()
        monitor_labels.append("input_reconst_acc")
        val_outputs.append(input_reconst_acc)

    # Compile prediction function
    predict = theano.function([input_var_sup], test_pred)
    predict_from_normed_inps = theano.function([normed_input_sup], test_pred)

    predict_scores = theano.function([input_var_sup], prediction_sup_det)
    predict_scores_from_normed_inps = theano.function([input_var_sup],
                                                      prediction_sup_det)

    # Compile validation function
    val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs,
                             on_unused_input='ignore')

    # Finally, launch the training loop.
    print("Starting training...")

    # Some variables
    patience = 0

    train_monitored = []
    valid_monitored = []
    train_loss = []

    # Pre-training monitoring
    print("Epoch 0 of {}".format(num_epochs))

    train_minibatches = mlh.iterate_minibatches(x_train,
                                                y_train,
                                                batch_size,
                                                shuffle=False)
    train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                               monitor_labels, prec_recall_cutoff)

    valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                y_valid,
                                                batch_size,
                                                shuffle=False)
    valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                               monitor_labels, prec_recall_cutoff)

    # Before starting training, save a copy of the model in case
    np.savez(
        os.path.join(save_path, 'dietnet_best.npz'),
        *lasagne.layers.get_all_param_values(
            filter(None, nets) + [discrim_net]))

    # Training loop
    start_training = time.time()
    for epoch in range(num_epochs):
        start_time = time.time()
        print("Epoch {} of {}".format(epoch + 1, num_epochs))
        nb_minibatches = 0
        loss_epoch = 0

        # Train pass
        for batch in mlh.iterate_minibatches(x_train,
                                             training_labels,
                                             batch_size,
                                             shuffle=True):
            loss_epoch += train_fn(*batch)
            nb_minibatches += 1

        loss_epoch /= nb_minibatches
        train_loss += [loss_epoch]

        # Monitoring on the training set
        train_minibatches = mlh.iterate_minibatches(x_train,
                                                    y_train,
                                                    batch_size,
                                                    shuffle=False)
        train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                                   monitor_labels, prec_recall_cutoff)
        train_monitored += [train_err]

        # Monitoring on the validation set
        valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                    y_valid,
                                                    batch_size,
                                                    shuffle=False)

        valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                                   monitor_labels, prec_recall_cutoff)
        valid_monitored += [valid_err]

        try:
            early_stop_val = valid_err[monitor_labels.index(
                early_stop_criterion)]
        except:
            raise ValueError("There is no monitored value by the name of %s" %
                             early_stop_criterion)

        valid_loss_sup_hist = [
            v[monitor_labels.index("loss. sup.")] for v in valid_monitored
        ]
        valid_loss_sup = valid_loss_sup_hist[-1]

        # Early stopping
        if epoch == 0:
            best_valid = early_stop_val
        elif ((early_stop_val > best_valid
               and early_stop_criterion == 'input_reconst_acc')
              or (early_stop_val > best_valid
                  and early_stop_criterion == 'accuracy')
              or (early_stop_val >= best_valid
                  and early_stop_criterion == 'accuracy'
                  and valid_loss_sup == min(valid_loss_sup_hist))
              or (early_stop_val < best_valid
                  and early_stop_criterion == 'loss. sup.')):
            best_valid = early_stop_val
            patience = 0

            # Save stuff
            np.savez(
                os.path.join(save_path, 'dietnet_best.npz'),
                *lasagne.layers.get_all_param_values(
                    filter(None, nets) + [discrim_net]))
            np.savez(save_path + "/errors_supervised_best.npz",
                     zip(*train_monitored), zip(*valid_monitored))

            # Monitor on the test set now because sometimes the saving doesn't
            # go well and there isn't a model to load at the end of training
            if y_test is not None:
                test_minibatches = mlh.iterate_minibatches(x_test,
                                                           y_test,
                                                           138,
                                                           shuffle=False)

                test_err = mlh.monitoring(test_minibatches, "test", val_fn,
                                          monitor_labels, prec_recall_cutoff)
        else:
            patience += 1
            # Save stuff
            np.savez(
                os.path.join(save_path, 'dietnet_last.npz'),
                *lasagne.layers.get_all_param_values(
                    filter(None, nets) + [discrim_net]))
            np.savez(save_path + "/errors_supervised_last.npz",
                     zip(*train_monitored), zip(*valid_monitored))

        print("  epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time))

        # End training if needed
        if patience == max_patience or epoch == num_epochs - 1:
            break

        # Anneal the learning rate
        lr.set_value(
            np.array(lr.get_value() * learning_rate_annealing,
                     dtype="float32"))

    # End training with a final monitoring step on the best model
    print("Ending training")

    # Load best model
    with np.load(os.path.join(save_path, 'dietnet_best.npz')) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        nlayers = len(
            lasagne.layers.get_all_params(filter(None, nets) + [discrim_net]))

        #lasagne.layers.set_all_param_values(filter(None, nets) +
        #                                    [discrim_net],
        #                                    param_values[:nlayers])
        params = lasagne.layers.get_all_params(
            filter(None, nets) + [discrim_net])
        for p, v in zip(params, param_values[:nlayers]):
            # Do not overwrite embedding value with old embedding. Removing
            # the following condition will prevent a trained model from being
            # tested on a different dataset
            if p.name != "feat_emb":
                p.set_value(v)

        if embedding_source is None:
            # Save embedding
            pred = pred_feat_emb()
            np.savez(os.path.join(save_path, 'feature_embedding.npz'), pred)

        # Training set results
        train_minibatches = mlh.iterate_minibatches(x_train,
                                                    y_train,
                                                    batch_size,
                                                    shuffle=False)
        train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                                   monitor_labels, prec_recall_cutoff)

        # Validation set results
        valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                    y_valid,
                                                    batch_size,
                                                    shuffle=False)
        valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                                   monitor_labels, prec_recall_cutoff)

        # Test set results
        if y_test is not None:
            test_minibatches = mlh.iterate_minibatches(x_test,
                                                       y_test,
                                                       138,
                                                       shuffle=False)

            test_err = mlh.monitoring(test_minibatches, "test", val_fn,
                                      monitor_labels, prec_recall_cutoff)

            # Test the model's accuracy with varying levels of provided SNPs
            test_minibatches = mlh.iterate_minibatches(x_test,
                                                       y_test,
                                                       138,
                                                       shuffle=False)
            mlh.eval_prediction(test_minibatches,
                                "test (rescaled)",
                                predict_from_normed_inps,
                                norm_mus,
                                norm_sigmas,
                                nb_evals=1,
                                rescale_inputs=True)

        # Save the model's test predictions to file
        print(x_test.shape)
        test_predictions = []
        for minibatch in mlh.iterate_testbatches(x_test, 1, shuffle=False):
            test_predictions += [predict(minibatch)]
        print(len(test_predictions))
        print(sum([t.shape[0] for t in test_predictions]))
        np.savez(os.path.join(save_path, 'test_predictions.npz'),
                 test_predictions)

        # Get the scores assigned by the model to each class for each test sample
        test_scores = []
        for minibatch in mlh.iterate_testbatches(x_test, 1, shuffle=False):
            test_scores += [predict_scores(minibatch)]
        np.savez(os.path.join(save_path, 'test_scores.npz'), test_scores)

        # Generate new SNP embeddings using test examples labeled according
        # to the model's predictions
        if bootstrap_snp_embeddings:

            if bootstrap_cutoff == "soft":
                bootstrap_snp_data = np.hstack(
                    (x_train.transpose(), x_valid.transpose(),
                     x_test.transpose()))
                bootstrap_labels = np.vstack(
                    (y_train, y_valid, np.array(test_scores)[:, 0, :]))

                filename_genotypic = 'bootstrap_gen_snp_embeddings_softlabels.npy'
                filename_allelic = 'bootstrap_all_snp_embeddings_softlabels.npy'

            else:  # Hard cutoff
                sure_test_idxs = np.argwhere(
                    (np.array(test_scores)[:, 0, :] >
                     bootstrap_cutoff).sum(1)).flatten()
                sure_test_inputs = x_test[sure_test_idxs]
                sure_test_preds = np.array(test_scores)[sure_test_idxs,
                                                        0].argmax(1)

                bootstrap_snp_data = np.hstack(
                    (x_train.transpose(), x_valid.transpose(),
                     sure_test_inputs.transpose()))
                bootstrap_labels = np.hstack(
                    (y_train.argmax(1), y_valid.argmax(1), sure_test_preds))

                filename_genotypic = 'bootstrap_gen_snp_embeddings_cutoff%f.npy' % bootstrap_cutoff
                filename_allelic = 'bootstrap_all_snp_embeddings_cutoff%f.npy' % bootstrap_cutoff

            utils_helpers.generate_snp_hist(
                bootstrap_snp_data,
                bootstrap_labels,
                label_names=label_names,
                perclass=True,
                sum_to_one=True,
                filename_genotypic=os.path.join(save_path, filename_genotypic),
                filename_allelic=os.path.join(save_path, filename_allelic))

    # Print all final errors for train, validation and test
    print("Training time:\t\t\t{:.3f}s".format(time.time() - start_training))

    # Analyse the model gradients to determine the influence of each SNP on
    # each of the model's prediction
    print(label_names)
    class_idx = T.iscalar("class index")
    grad_fn = theano.function([input_var_sup, class_idx],
                              T.grad(prediction_sup_det[:, class_idx].mean(),
                                     input_var_sup).mean(0))
    grads_wrt_inputs = mlh.get_grads_wrt_inputs(x_test, grad_fn, feature_names,
                                                label_names)

    # Obtain function that takes as inputs normed inputs and returns the
    # gradient of a class score wrt the normed inputs themselves (this is
    # requird because computing the integrated gradients requires to be able
    # to interpolate between an example where all features are missing and an
    # example where any number of features are provided)
    grad_from_normed_fn = theano.function(
        [normed_input_sup, class_idx],
        T.grad(prediction_sup_det[:, class_idx].sum(),
               normed_input_sup).mean(0))

    # Collect integrated gradients over the whole test set. Obtain, for each
    # SNP, for each possible value (0, 1 or 2), the average contribution of that
    # value for what SNP to the score of each class.
    avg_int_grads = np.zeros((x_test.shape[1], 3, len(label_names)),
                             dtype="float32")
    counts_int_grads = np.zeros((x_test.shape[1], 3), dtype="int32")
    for test_idx in range(x_test.shape[0]):
        int_grads = mlh.get_integrated_gradients(x_test[test_idx],
                                                 grad_from_normed_fn,
                                                 feature_names,
                                                 label_names,
                                                 norm_mus,
                                                 norm_sigmas,
                                                 m=100)

        snp_value_mask = np.arange(3) == x_test[test_idx][:, None]
        avg_int_grads += snp_value_mask[:, :,
                                        None] * int_grads.transpose()[:,
                                                                      None, :]
        counts_int_grads += snp_value_mask
    avg_int_grads = avg_int_grads / counts_int_grads[:, :, None]

    # Save all the additional information required for model analysis :
    # - Test predictions
    # - SNP IDs
    # - Subject IDs
    # - Normalization parameters for the input minibatches
    np.savez(os.path.join(save_path, 'additional_data.npz'),
             test_labels=y_test,
             test_scores=np.array(test_scores)[:, 0],
             test_predictions=np.array(test_predictions)[:, 0],
             norm_mus=norm_mus,
             norm_sigmas=norm_sigmas,
             grads_wrt_inputs=grads_wrt_inputs,
             exmpl_ids_train=exmpl_ids_train,
             exmpl_ids_valid=exmpl_ids_valid,
             exmpl_ids_test=exmpl_ids_test,
             feature_names=feature_names,
             label_names=label_names,
             avg_int_grads=avg_int_grads)

    # Copy files to loadpath (only if some training has beeen done so there
    # is a local saved version)
    if save_path != save_copy and num_epochs > 0:
        print('Copying model and other training files to {}'.format(save_copy))
        copy_tree(save_path, save_copy)
Beispiel #2
0
def execute(dataset,
            n_hidden_u,
            n_hidden_t_enc,
            n_hidden_t_dec,
            n_hidden_s,
            learning_rate,
            learning_rate_annealing=1.,
            embedding_source=None,
            alpha=1,
            beta=1,
            gamma=1,
            lmd=0,
            encoder_net_init=0.001,
            decoder_net_init=0.001,
            disc_nonlinearity='softmax',
            keep_labels=1.0,
            prec_recall_cutoff=True,
            missing_labels_val=-1.0,
            which_fold=0,
            early_stop_criterion='accuracy',
            save_path='/Tmp/romerosa/DietNetworks/',
            dataset_path='/Tmp/' + os.environ["USER"] + '/datasets/',
            resume=False,
            exp_name=''):

    # Prepare embedding information
    if embedding_source is None:
        embedding_input = 'raw'
    else:
        embedding_input = embedding_source
        embedding_source = os.path.join(
            dataset_path, embedding_input + '_fold' + str(which_fold) + '.npy')

    # Load the dataset
    print("Loading data")
    x_train, y_train, x_valid, y_valid, x_test, y_test, \
        x_unsup, training_labels = mlh.load_data(
            dataset, dataset_path, embedding_source,
            which_fold=which_fold, keep_labels=keep_labels,
            missing_labels_val=missing_labels_val,
            embedding_input=embedding_input)

    if x_unsup is not None:
        n_samples_unsup = x_unsup.shape[1]
    else:
        n_samples_unsup = 0

    # Extract required information from data
    n_samples, n_feats = x_train.shape
    print("Number of features : ", n_feats)
    print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1]))
    n_targets = y_train.shape[1]

    # Set some variables
    batch_size = 138
    beta = gamma if (gamma == 0) else beta

    # Preparing folder to save stuff
    if embedding_source is None:
        embedding_name = embedding_input
    else:
        embedding_name = embedding_source.replace("_", "").split(".")[0]
        exp_name += embedding_name.rsplit('/', 1)[::-1][0] + '_'

    exp_name += mlh.define_exp_name(keep_labels, alpha, beta, gamma, lmd,
                                    n_hidden_u, n_hidden_t_enc, n_hidden_t_dec,
                                    n_hidden_s, which_fold, learning_rate,
                                    decoder_net_init, encoder_net_init,
                                    early_stop_criterion,
                                    learning_rate_annealing)

    print("Experiment: " + exp_name)
    save_path = os.path.join(save_path, dataset, exp_name)
    print(save_path)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Prepare Theano variables for inputs and targets
    input_var_sup = T.matrix('input_sup')
    input_var_unsup = theano.shared(x_unsup, 'input_unsup')  # x_unsup TBD
    target_var_sup = T.matrix('target_sup')

    # Build model
    print("Building model")

    # Some checkings
    # assert len(n_hidden_u) > 0
    assert len(n_hidden_t_enc) > 0
    assert len(n_hidden_t_dec) > 0
    assert n_hidden_t_dec[-1] == n_hidden_t_enc[-1]

    # Build feature embedding networks (encoding and decoding if gamma > 0)
    nets, embeddings, pred_feat_emb = mh.build_feat_emb_nets(
        embedding_source, n_feats, n_samples_unsup, input_var_unsup,
        n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, gamma, encoder_net_init,
        encoder_net_init, save_path)

    # Build feature embedding reconstruction networks (if alpha > 0, beta > 0)
    nets += mh.build_feat_emb_reconst_nets(
        [alpha, beta], n_samples_unsup, n_hidden_u,
        [n_hidden_t_enc, n_hidden_t_dec], nets,
        [encoder_net_init, encoder_net_init])

    # Supervised network
    discrim_net, hidden_rep = mh.build_discrim_net(
        batch_size, n_feats, input_var_sup, n_hidden_t_enc, n_hidden_s,
        embeddings[0], disc_nonlinearity, n_targets)

    # Reconstruct network
    nets += [
        mh.build_reconst_net(hidden_rep,
                             embeddings[1] if len(embeddings) > 1 else None,
                             n_feats, gamma)
    ]

    # Load best model
    with np.load(os.path.join(save_path, 'dietnets_best.npz')) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(
        filter(None, nets) + [discrim_net], param_values)

    print("Building and compiling training functions")

    # Build functions
    predictions, predictions_det = mh.define_predictions(nets, start=2)
    prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net])
    prediction_sup = prediction_sup[0]
    prediction_sup_det = prediction_sup_det[0]

    # Define losses
    # reconstruction losses
    _, reconst_losses_det = mh.define_reconst_losses(
        predictions, predictions_det,
        [input_var_unsup, input_var_unsup, input_var_sup])
    # supervised loss
    _, sup_loss_det = mh.define_sup_loss(disc_nonlinearity, prediction_sup,
                                         prediction_sup_det, keep_labels,
                                         target_var_sup, missing_labels_val)

    # Define inputs
    inputs = [input_var_sup, target_var_sup]

    # Combine losses
    loss_det = sup_loss_det + alpha*reconst_losses_det[0] + \
        beta*reconst_losses_det[1] + gamma*reconst_losses_det[2]

    # Define parameters
    params = lasagne.layers.get_all_params([discrim_net] + filter(None, nets),
                                           trainable=True)

    l2_penalty = apply_penalty(params, l2)
    loss_det = loss_det + lmd * l2_penalty

    # Monitoring Labels
    monitor_labels = [
        "reconst. feat. W_enc", "reconst. feat. W_dec", "reconst. loss"
    ]
    monitor_labels = [
        i for i, j in zip(monitor_labels, reconst_losses_det) if j != 0
    ]
    monitor_labels += ["feat. W_enc. mean", "feat. W_enc var"]
    monitor_labels += ["feat. W_dec. mean", "feat. W_dec var"] if \
        (embeddings[1] is not None) else []
    monitor_labels += ["loss. sup.", "total loss"]

    # test function
    val_outputs = reconst_losses_det
    val_outputs = [
        i for i, j in zip(val_outputs, reconst_losses_det) if j != 0
    ]
    val_outputs += [embeddings[0].mean(), embeddings[0].var()]
    val_outputs += [embeddings[1].mean(), embeddings[1].var()] if \
        (embeddings[1] is not None) else []
    val_outputs += [sup_loss_det, loss_det]

    # Compute accuracy and add it to monitoring list
    test_acc, test_pred = mh.define_test_functions(disc_nonlinearity,
                                                   prediction_sup,
                                                   prediction_sup_det,
                                                   target_var_sup)
    monitor_labels.append("accuracy")
    val_outputs.append(test_acc)

    # Compile prediction function
    predict = theano.function([input_var_sup], test_pred)

    # Compile validation function
    val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs,
                             on_unused_input='ignore')

    # Finally, launch the training loop.
    print("Starting testing...")
    test_minibatches = mlh.iterate_minibatches(x_test,
                                               y_test,
                                               batch_size,
                                               shuffle=False)
    test_err, pred, targets = mlh.monitoring(test_minibatches,
                                             "test",
                                             val_fn,
                                             monitor_labels,
                                             prec_recall_cutoff,
                                             return_pred=True)

    lab = targets.argmax(1)
    pred_argmax = pred.argmax(1)

    continent_cat = mh.create_1000_genomes_continent_labels()

    lab_cont = np.zeros(lab.shape)
    pred_cont = np.zeros(pred_argmax.shape)

    for i, c in enumerate(continent_cat):
        for el in c:
            lab_cont[lab == el] = i
            pred_cont[pred_argmax == el] = i

    cm_e = np.zeros((26, 26))
    cm_c = np.zeros((5, 5))

    for i in range(26):
        for j in range(26):
            cm_e[i, j] = ((pred_argmax == i) * (lab == j)).sum()

    for i in range(5):
        for j in range(5):
            cm_c[i, j] = ((pred_cont == i) * (lab_cont == j)).sum()

    np.savez(os.path.join(save_path, 'cm' + str(which_fold) + '.npz'),
             cm_e=cm_e,
             cm_c=cm_c)

    print(os.path.join(save_path, 'cm' + str(which_fold) + '.npz'))
Beispiel #3
0
def execute(dataset,
            n_hidden_u,
            n_hidden_t_enc,
            n_hidden_t_dec,
            n_hidden_s,
            learning_rate,
            learning_rate_annealing=.99,
            embedding_source=None,
            alpha=1,
            beta=1,
            gamma=1,
            lmd=0.0,
            encoder_net_init=0.1,
            decoder_net_init=0.1,
            keep_labels=1.0,
            which_fold=0,
            early_stop_criterion='accuracy',
            exp_name='',
            representation='features',
            which_set='test',
            model_path='/Tmp/romerosa/DietNetworks/newmodel/',
            save_path='/Tmp/romerosa/DietNetworks/',
            dataset_path='/Tmp/' + os.environ["USER"] + '/datasets/'):

    print(save_path)

    # Prepare embedding information
    if embedding_source is None:
        embedding_input = 'raw'
    else:
        embedding_input = embedding_source
        embedding_source = os.path.join(
            dataset_path, embedding_input + '_fold' + str(which_fold) + '.npy')

    # Load the dataset
    print("Loading data")
    x_train, y_train, x_valid, y_valid, x_test, y_test, \
        x_unsup, training_labels = mlh.load_data(
            dataset, dataset_path, embedding_source,
            which_fold=which_fold, keep_labels=1.0,
            missing_labels_val=-1.0,
            embedding_input=embedding_input)

    if which_set == 'train':
        x = x_train
        y = y_train
    elif which_set == 'valid':
        x = x_valid
        y = y_valid
    elif which_set == 'test':
        x = x_test
        y = y_test

    if x_unsup is not None:
        n_samples_unsup = x_unsup.shape[1]
    else:
        n_samples_unsup = 0

    # Extract required information from data
    n_samples, n_feats = x_train.shape
    n_targets = y_train.shape[1]

    # Set some variables
    batch_size = 1
    beta = gamma if (gamma == 0) else beta

    # Preparing folder to save stuff
    if embedding_source is None:
        embedding_name = embedding_input
    else:
        embedding_name = embedding_source.replace("_", "").split(".")[0]
        exp_name += embedding_name.rsplit('/', 1)[::-1][0] + '_'

    exp_name += mlh.define_exp_name(keep_labels, alpha, beta, gamma, lmd,
                                    n_hidden_u, n_hidden_t_enc, n_hidden_t_dec,
                                    n_hidden_s, which_fold, learning_rate,
                                    decoder_net_init, encoder_net_init,
                                    early_stop_criterion,
                                    learning_rate_annealing)

    print("Experiment: " + exp_name)
    model_path = os.path.join(model_path, dataset, exp_name)
    print(model_path)
    save_path = os.path.join(save_path, representation, embedding_input,
                             'fold' + str(which_fold))
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Prepare Theano variables for inputs and targets
    input_var_sup = T.matrix('input_sup')
    input_var_unsup = theano.shared(x_unsup, 'input_unsup')  # x_unsup TBD
    target_var_sup = T.matrix('target_sup')

    # Build model
    print("Building model")

    # Some checkings
    # assert len(n_hidden_u) > 0
    assert len(n_hidden_t_enc) > 0
    assert len(n_hidden_t_dec) > 0
    assert n_hidden_t_dec[-1] == n_hidden_t_enc[-1]

    # Build feature embedding networks (encoding and decoding if gamma > 0)
    nets, embeddings, pred_feat_emb = mh.build_feat_emb_nets(
        embedding_source, n_feats, n_samples_unsup, input_var_unsup,
        n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, gamma, encoder_net_init,
        encoder_net_init, save_path)

    # Build feature embedding reconstruction networks (if alpha > 0, beta > 0)
    nets += mh.build_feat_emb_reconst_nets(
        [alpha, beta], n_samples_unsup, n_hidden_u,
        [n_hidden_t_enc, n_hidden_t_dec], nets,
        [encoder_net_init, encoder_net_init])

    # Supervised network
    discrim_net, hidden_rep = mh.build_discrim_net(batch_size, n_feats,
                                                   input_var_sup,
                                                   n_hidden_t_enc, n_hidden_s,
                                                   embeddings[0], 'softmax',
                                                   n_targets)

    # Reconstruct network
    nets += [
        mh.build_reconst_net(hidden_rep,
                             embeddings[1] if len(embeddings) > 1 else None,
                             n_feats, gamma)
    ]

    # Load best model
    with np.load(os.path.join(model_path, 'dietnets_best.npz')) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(
        filter(None, nets) + [discrim_net], param_values)

    print("Building and compiling training functions")

    # Build and compile training functions
    if representation == 'features':
        feat_layers = lasagne.layers.get_all_layers(nets[0])
        predictions = lasagne.layers.get_output(feat_layers)
        inputs = []
        predict = theano.function(inputs, predictions)
        all_pred = predict()
        all_pred = all_pred

        for i, el in enumerate(all_pred):
            file_name = os.path.join(save_path, 'layer' + str(i) + '.npy')
            print(file_name)
            np.save(file_name, el)

    elif representation == 'subjects':
        subject_layers = lasagne.layers.get_all_layers(discrim_net)
        subject_layers = [
            el for el in subject_layers if isinstance(el, DenseLayer)
        ]
        predictions = lasagne.layers.get_output(subject_layers)
        inputs = [input_var_sup]
        predict = theano.function(inputs, predictions)

        iterate_minibatches = mlh.iterate_minibatches(x,
                                                      y,
                                                      batch_size,
                                                      shuffle=False)
        print("Starting testing...")
        all_pred = []
        for batch in iterate_minibatches:
            all_pred += [predict(batch[0])]

        all_pred = zip(*all_pred)
        all_pred = [np.vstack(el) for el in all_pred]

        for i, el in enumerate(all_pred):
            file_name = os.path.join(
                save_path, 'layer' + str(i) + '_' + which_set + '.npz')
            print(file_name)
            np.savez(file_name, representation=el, label=y.argmax(1))
Beispiel #4
0
def execute(
        dataset,
        n_hidden_u,
        n_hidden_t_enc,
        n_hidden_t_dec,
        n_hidden_s,
        embedding_source=None,
        num_epochs=500,
        learning_rate=.001,
        learning_rate_annealing=1.0,
        alpha=1,
        beta=1,
        gamma=1,
        lmd=.0001,
        disc_nonlinearity="sigmoid",
        encoder_net_init=0.2,
        decoder_net_init=0.2,
        keep_labels=1.0,
        prec_recall_cutoff=True,
        missing_labels_val=-1.0,
        which_fold=0,
        early_stop_criterion='loss_sup_det',
        embedding_input='raw',
        save_path='/Tmp/' + os.environ["USER"] +
    '/savepath/',  # a default value was needed?
        save_copy='/Tmp/' + os.environ["USER"] + '/savecopy/',
        dataset_path='/Tmp/' + os.environ["USER"] + '/datasets/',
        resume=False,
        exp_name='',
        random_proj=0):

    # Load the dataset
    print("Loading data")
    x_train, y_train, x_valid, y_valid, x_test, y_test, \
        x_unsup, training_labels = mlh.load_data(
            dataset, dataset_path, embedding_source,
            which_fold=which_fold, keep_labels=keep_labels,
            missing_labels_val=missing_labels_val,
            embedding_input=embedding_input)

    if x_unsup is not None:
        n_samples_unsup = x_unsup.shape[1]
    else:
        n_samples_unsup = 0

    # Extract required information from data
    n_samples, n_feats = x_train.shape
    print("Number of features : ", n_feats)
    print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1]))
    n_targets = y_train.shape[1]

    # Set some variables
    batch_size = 128
    beta = gamma if (gamma == 0) else beta

    # Preparing folder to save stuff
    if embedding_source is None:
        embedding_name = embedding_input
    else:
        embedding_name = embedding_source.replace("_", "").split(".")[0]
        exp_name += embedding_name.rsplit('/', 1)[::-1][0] + '_'

    exp_name += 'final_'

    exp_name += mlh.define_exp_name(keep_labels, alpha, beta, gamma, lmd,
                                    n_hidden_u, n_hidden_t_enc, n_hidden_t_dec,
                                    n_hidden_s, which_fold, embedding_input,
                                    learning_rate, decoder_net_init,
                                    encoder_net_init, early_stop_criterion,
                                    learning_rate_annealing)

    print("Experiment: " + exp_name)
    save_path = os.path.join(save_path, dataset, exp_name)
    save_copy = os.path.join(save_copy, dataset, exp_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    if not os.path.exists(save_copy):
        os.makedirs(save_copy)

    # Prepare Theano variables for inputs and targets
    input_var_sup = T.matrix('input_sup')
    input_var_unsup = theano.shared(x_unsup, 'input_unsup')  # x_unsup TBD
    target_var_sup = T.matrix('target_sup')
    lr = theano.shared(np.float32(learning_rate), 'learning_rate')

    # Build model
    print("Building model")

    # Some checkings
    # assert len(n_hidden_u) > 0
    assert len(n_hidden_t_enc) > 0
    assert len(n_hidden_t_dec) > 0
    assert n_hidden_t_dec[-1] == n_hidden_t_enc[-1]

    # Build feature embedding networks (encoding and decoding if gamma > 0)
    nets, embeddings, pred_feat_emb = mh.build_feat_emb_nets(
        embedding_source, n_feats, n_samples_unsup, input_var_unsup,
        n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, gamma, encoder_net_init,
        decoder_net_init, save_path, random_proj)

    # Build feature embedding reconstruction networks (if alpha > 0, beta > 0)
    nets += mh.build_feat_emb_reconst_nets(
        [alpha, beta], n_samples_unsup, n_hidden_u,
        [n_hidden_t_enc, n_hidden_t_dec], nets,
        [encoder_net_init, decoder_net_init])

    # Supervised network
    discrim_net, hidden_rep = mh.build_discrim_net(
        batch_size, n_feats, input_var_sup, n_hidden_t_enc, n_hidden_s,
        embeddings[0], disc_nonlinearity, n_targets)

    # Reconstruct network
    nets += [
        mh.build_reconst_net(hidden_rep,
                             embeddings[1] if len(embeddings) > 1 else None,
                             n_feats, gamma)
    ]

    # Load weights if we are resuming job
    if resume:
        # Load best model
        with np.load(os.path.join(save_path, 'model_feat_sel_last.npz')) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        nlayers = len(
            lasagne.layers.get_all_params(filter(None, nets) + [discrim_net]))
        lasagne.layers.set_all_param_values(
            filter(None, nets) + [discrim_net], param_values[:nlayers])

    print("Building and compiling training functions")

    # Build and compile training functions
    predictions, predictions_det = mh.define_predictions(nets, start=2)
    prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net])
    prediction_sup = prediction_sup[0]
    prediction_sup_det = prediction_sup_det[0]

    # Define losses
    # reconstruction losses
    reconst_losses, reconst_losses_det = mh.define_reconst_losses(
        predictions, predictions_det,
        [input_var_unsup, input_var_unsup, input_var_sup])
    # supervised loss
    sup_loss, sup_loss_det = mh.define_sup_loss(disc_nonlinearity,
                                                prediction_sup,
                                                prediction_sup_det,
                                                keep_labels, target_var_sup,
                                                missing_labels_val)

    # Define inputs
    inputs = [input_var_sup, target_var_sup]

    # Define parameters
    params = lasagne.layers.get_all_params([discrim_net] + filter(None, nets),
                                           trainable=True)
    params_to_freeze= \
        lasagne.layers.get_all_params(filter(None, nets), trainable=False)

    print('Number of params discrim: ' + str(len(params)))
    print('Number of params to freeze: ' + str(len(params_to_freeze)))

    for p in params_to_freeze:
        new_params = [el for el in params if el != p]
        params = new_params

    print('Number of params to update: ' + str(len(params)))

    # Combine losses
    loss = sup_loss + alpha*reconst_losses[0] + beta*reconst_losses[1] + \
        gamma*reconst_losses[2]
    loss_det = sup_loss_det + alpha*reconst_losses_det[0] + \
        beta*reconst_losses_det[1] + gamma*reconst_losses_det[2]

    l2_penalty = apply_penalty(params, l2)
    loss = loss + lmd * l2_penalty
    loss_det = loss_det + lmd * l2_penalty

    # Compute network updates
    updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr)
    # updates = lasagne.updates.sgd(loss,
    #                               params,
    #                               learning_rate=lr)
    # updates = lasagne.updates.momentum(loss, params,
    #                                    learning_rate=lr, momentum=0.0)

    # Apply norm constraints on the weights
    for k in updates.keys():
        if updates[k].ndim == 2:
            updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0)

    # Compile training function
    train_fn = theano.function(inputs,
                               loss,
                               updates=updates,
                               on_unused_input='ignore')

    # Monitoring Labels
    monitor_labels = [
        "reconst. feat. W_enc", "reconst. feat. W_dec", "reconst. loss"
    ]
    monitor_labels = [
        i for i, j in zip(monitor_labels, reconst_losses) if j != 0
    ]
    monitor_labels += ["feat. W_enc. mean", "feat. W_enc var"]
    monitor_labels += ["feat. W_dec. mean", "feat. W_dec var"] if \
        (embeddings[1] is not None) else []
    monitor_labels += ["loss. sup.", "total loss"]

    # Build and compile test function
    val_outputs = reconst_losses_det
    val_outputs = [i for i, j in zip(val_outputs, reconst_losses) if j != 0]
    val_outputs += [embeddings[0].mean(), embeddings[0].var()]
    val_outputs += [embeddings[1].mean(), embeddings[1].var()] if \
        (embeddings[1] is not None) else []
    val_outputs += [sup_loss_det, loss_det]

    # Compute accuracy and add it to monitoring list
    test_acc, test_pred = mh.define_test_functions(disc_nonlinearity,
                                                   prediction_sup,
                                                   prediction_sup_det,
                                                   target_var_sup)
    monitor_labels.append("accuracy")
    val_outputs.append(test_acc)

    # Compile prediction function
    predict = theano.function([input_var_sup], test_pred)

    # Compile validation function
    val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs,
                             on_unused_input='ignore')

    # Finally, launch the training loop.
    print("Starting training...")

    # Some variables
    max_patience = 100
    patience = 0

    train_monitored = []
    valid_monitored = []
    train_loss = []

    # Pre-training monitoring
    print("Epoch 0 of {}".format(num_epochs))

    train_minibatches = mlh.iterate_minibatches(x_train,
                                                y_train,
                                                batch_size,
                                                shuffle=False)
    train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                               monitor_labels, prec_recall_cutoff)

    valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                y_valid,
                                                batch_size,
                                                shuffle=False)
    valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                               monitor_labels, prec_recall_cutoff)

    # Training loop
    start_training = time.time()
    for epoch in range(num_epochs):
        start_time = time.time()
        print("Epoch {} of {}".format(epoch + 1, num_epochs))
        nb_minibatches = 0
        loss_epoch = 0

        # Train pass
        for batch in mlh.iterate_minibatches(x_train,
                                             training_labels,
                                             batch_size,
                                             shuffle=True):
            loss_epoch += train_fn(*batch)
            nb_minibatches += 1

        loss_epoch /= nb_minibatches
        train_loss += [loss_epoch]

        # Monitoring on the training set
        train_minibatches = mlh.iterate_minibatches(x_train,
                                                    y_train,
                                                    batch_size,
                                                    shuffle=False)
        train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                                   monitor_labels, prec_recall_cutoff)
        train_monitored += [train_err]

        # Monitoring on the validation set
        valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                    y_valid,
                                                    batch_size,
                                                    shuffle=False)

        valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                                   monitor_labels, prec_recall_cutoff)
        valid_monitored += [valid_err]

        try:
            early_stop_val = valid_err[monitor_labels.index(
                early_stop_criterion)]
        except:
            raise ValueError("There is no monitored value by the name of %s" %
                             early_stop_criterion)

        # Early stopping
        if epoch == 0:
            best_valid = early_stop_val
        elif (early_stop_val > best_valid and early_stop_criterion == 'accuracy') or \
             (early_stop_val < best_valid and early_stop_criterion == 'loss. sup.'):
            best_valid = early_stop_val
            patience = 0

            # Save stuff
            np.savez(
                os.path.join(save_path, 'model_feat_sel_best.npz'),
                *lasagne.layers.get_all_param_values(
                    filter(None, nets) + [discrim_net]))
            np.savez(save_path + "/errors_supervised_best.npz",
                     zip(*train_monitored), zip(*valid_monitored))

            # Monitor on the test set now because sometimes the saving doesn't
            # go well and there isn't a model to load at the end of training
            if y_test is not None:
                test_minibatches = mlh.iterate_minibatches(x_test,
                                                           y_test,
                                                           138,
                                                           shuffle=False)

                test_err = mlh.monitoring(test_minibatches, "test", val_fn,
                                          monitor_labels, prec_recall_cutoff)
        else:
            patience += 1
            # Save stuff
            np.savez(
                os.path.join(save_path, 'model_feat_sel_last.npz'),
                *lasagne.layers.get_all_param_values(
                    filter(None, nets) + [discrim_net]))
            np.savez(save_path + "/errors_supervised_last.npz",
                     zip(*train_monitored), zip(*valid_monitored))

        # End training
        if patience == max_patience or epoch == num_epochs - 1:
            print("Ending training")
            # Load best model
            with np.load(os.path.join(save_path,
                                      'model_feat_sel_best.npz')) as f:
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            nlayers = len(
                lasagne.layers.get_all_params(
                    filter(None, nets) + [discrim_net]))
            lasagne.layers.set_all_param_values(
                filter(None, nets) + [discrim_net], param_values[:nlayers])
            if embedding_source is None:
                # Save embedding
                pred = pred_feat_emb()
                np.savez(os.path.join(save_path, 'feature_embedding.npz'),
                         pred)

            # Training set results
            train_minibatches = mlh.iterate_minibatches(x_train,
                                                        y_train,
                                                        batch_size,
                                                        shuffle=False)
            train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                                       monitor_labels, prec_recall_cutoff)

            # Validation set results
            valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                        y_valid,
                                                        batch_size,
                                                        shuffle=False)
            valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                                       monitor_labels, prec_recall_cutoff)

            # Test set results
            if y_test is not None:
                test_minibatches = mlh.iterate_minibatches(x_test,
                                                           y_test,
                                                           138,
                                                           shuffle=False)

                test_err = mlh.monitoring(test_minibatches, "test", val_fn,
                                          monitor_labels, prec_recall_cutoff)
                np.savez(os.path.join(save_path, 'final_errors.npz'), test_err)
            else:
                for minibatch in mlh.iterate_testbatches(x_test,
                                                         138,
                                                         shuffle=False):
                    test_predictions = []
                    test_predictions += [predict(minibatch)]
                np.savez(os.path.join(save_path, 'test_predictions.npz'),
                         test_predictions)

            # Stop
            print("  epoch time:\t\t\t{:.3f}s \n".format(time.time() -
                                                         start_time))
            break

        print("  epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time))

        # Anneal the learning rate
        lr.set_value(float(lr.get_value() * learning_rate_annealing))

    # Print and save all final errors for train, validation and test
    print("Training time:\t\t\t{:.3f}s".format(time.time() - start_training))
    print("test_err:", test_err)

    # Copy files to loadpath
    if save_path != save_copy:
        print('Copying model and other training files to {}'.format(save_copy))
        copy_tree(save_path, save_copy)