def execute(dataset, n_hidden_u, num_epochs=500, learning_rate=.001, learning_rate_annealing=1.0, lmd=.0001, embedding_input='raw', which_fold=0, save_path='/Tmp/$USER/feature_selection/newmodel/', save_copy='/Tmp/$USER/feature_selection/newmodel/', dataset_path='/Tmp/$USER/feature_selection/newmodel/'): # Load the dataset print("Loading data") x_unsup = mlh.load_data(dataset, dataset_path, None, which_fold=which_fold, keep_labels=1.0, missing_labels_val=-1.0, embedding_input=embedding_input, transpose=True) x_train = x_unsup[0][0] x_valid = x_unsup[1][0] # Extract required information from data n_row, n_col = x_train.shape print('Data size ' + str(n_row) + 'x' + str(n_col)) # Set some variables batch_size = 256 # Define experiment name exp_name = 'pretrain_' + mlh.define_exp_name( 1., 0, 0, 0, lmd, n_hidden_u, [], [], [], which_fold, embedding_input, learning_rate, 0, 0, 'reconst_loss', learning_rate_annealing) print('Experiment: ' + exp_name) # Preparing folder to save stuff save_path = os.path.join(save_path, dataset, exp_name) save_copy = os.path.join(save_copy, dataset, exp_name) if not os.path.exists(save_path): os.makedirs(save_path) # Prepare Theano variables for inputs and targets input_var = T.matrix('input_unsup') lr = theano.shared(np.float32(learning_rate), 'learning_rate') # Build model print("Building model") # Some checkings assert len(n_hidden_u) > 0 # Build unsupervised network encoder_net = InputLayer((None, n_col), input_var) for out in n_hidden_u: encoder_net = DenseLayer(encoder_net, num_units=out, nonlinearity=tanh) encoder_net = DropoutLayer(encoder_net) decoder_net = encoder_net for i in range(len(n_hidden_u) - 2, -1, -1): decoder_net = DenseLayer(decoder_net, num_units=n_hidden_u[i], nonlinearity=linear) decoder_net = DropoutLayer(decoder_net) decoder_net = DenseLayer(decoder_net, num_units=n_col, nonlinearity=linear) if embedding_input == 'raw' or embedding_input == 'w2v': final_nonlin = linear elif embedding_input == 'bin': final_nonlin = sigmoid elif 'histo' in embedding_input: final_nonlin = softmax if embedding_input == 'histo3x26': laySize = lasagne.layers.get_output(decoder_net).shape decoder_net = ReshapeLayer(decoder_net, (laySize[0] * 26, 3)) decoder_net = NonlinearityLayer(decoder_net, nonlinearity=final_nonlin) if embedding_input == 'histo3x26': decoder_net = ReshapeLayer(decoder_net, (laySize[0], laySize[1])) print("Building and compiling training functions") # Build and compile training functions predictions, predictions_det = mh.define_predictions( [encoder_net, decoder_net], start=0) prediction_sup, prediction_sup_det = mh.define_predictions( [encoder_net, decoder_net], start=0) # Define losses # reconstruction losses loss, loss_det = mh.define_loss(predictions[1], predictions_det[1], input_var, embedding_input) # Define parameters params = lasagne.layers.get_all_params(decoder_net, trainable=True) l2_penalty = apply_penalty(params, l2) loss = loss + lmd * l2_penalty loss_det = loss_det + lmd * l2_penalty # Compute network updates updates = lasagne.updates.adam(loss, params, learning_rate=lr) # updates = lasagne.updates.sgd(loss, # params, # learning_rate=lr) # updates = lasagne.updates.momentum(loss, params, # learning_rate=lr, momentum=0.0) # Apply norm constraints on the weights for k in updates.keys(): if updates[k].ndim == 2: updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0) # Compile training function train_fn = theano.function([input_var], loss, updates=updates, on_unused_input='ignore') # Expressions required for test monitor_labels = ['loss'] val_outputs = [loss_det] # Add some monitoring on the learned feature embedding val_outputs += [ predictions[0].min(), predictions[0].mean(), predictions[0].max(), predictions[0].var() ] monitor_labels += [ "feat. emb. min", "feat. emb. mean", "feat. emb. max", "feat. emb. var" ] # Compile validation function val_fn = theano.function([input_var], val_outputs) pred_feat_emb = theano.function([input_var], predictions_det[0]) # Finally, launch the training loop. print("Starting training...") # Some variables max_patience = 100 patience = 0 train_monitored = [] valid_monitored = [] train_loss = [] nb_minibatches = n_row / batch_size print("Nb of minibatches: " + str(nb_minibatches)) start_training = time.time() for epoch in range(num_epochs): start_time = time.time() print("Epoch {} of {}".format(epoch + 1, num_epochs)) loss_epoch = 0 # Train pass for batch in mlh.iterate_minibatches_unsup(x_train, batch_size, shuffle=True): loss_epoch += train_fn(batch) loss_epoch /= nb_minibatches train_loss += [loss_epoch] train_minibatches = mlh.iterate_minibatches_unsup(x_train, batch_size, shuffle=True) train_err = mlh.monitoring(train_minibatches, "train", val_fn, monitor_labels, start=0) train_monitored += [train_err] # Validation pass valid_minibatches = mlh.iterate_minibatches_unsup(x_valid, batch_size, shuffle=True) valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn, monitor_labels, start=0) valid_monitored += [valid_err] try: early_stop_val = valid_err[monitor_labels.index('loss')] except: raise ValueError("There is no monitored value by the name of %s" % early_stop_criterion) # Eearly stopping if epoch == 0: best_valid = early_stop_val elif early_stop_val < best_valid: best_valid = early_stop_val patience = 0 # Save stuff np.savez( os.path.join(save_path, 'model_enc_unsupervised_best.npz'), *lasagne.layers.get_all_param_values(encoder_net)) np.savez(os.path.join(save_path, 'model_ae_unsupervised_best.npz'), *lasagne.layers.get_all_param_values(encoder_net)) np.savez(os.path.join(save_path, "errors_unsupervised_best.npz"), zip(*train_monitored), zip(*valid_monitored)) else: patience += 1 # Save stuff np.savez( os.path.join(save_path, 'model_enc_unsupervised_last.npz'), *lasagne.layers.get_all_param_values(encoder_net)) np.savez(os.path.join(save_path, 'model_ae_unsupervised_last.npz'), *lasagne.layers.get_all_param_values(encoder_net)) np.savez(os.path.join(save_path, "errors_unsupervised_last.npz"), zip(*train_monitored), zip(*valid_monitored)) # End training if patience == max_patience or epoch == num_epochs - 1: print(" Ending training") # Load unsupervised best model if not os.path.exists(save_path + '/model_enc_unsupervised_best.npz'): print("No saved model to be tested and/or generate" " the embedding !") else: with np.load(save_path + '/model_enc_unsupervised_best.npz', ) as f: param_values = [ f['arr_%d' % i] for i in range(len(f.files)) ] lasagne.layers.set_all_param_values( encoder_net, param_values) # Save embedding preds = [] for batch in mlh.iterate_minibatches_unsup(x_train, 1, shuffle=False): preds.append(pred_feat_emb(batch)) for batch in mlh.iterate_minibatches_unsup(x_valid, 1, shuffle=False): preds.append(pred_feat_emb(batch)) preds = np.vstack(preds) np.savez(os.path.join(save_path, 'feature_embedding.npz'), preds) # Stop print(" epoch time:\t\t\t{:.3f}s".format(time.time() - start_time)) break print(" epoch time:\t\t\t{:.3f}s".format(time.time() - start_time)) # Anneal the learning rate lr.set_value(float(lr.get_value() * learning_rate_annealing)) # Print all final errors for train, validation and test print("Training time:\t\t\t{:.3f}s".format(time.time() - start_training)) # Copy files to loadpath if save_path != save_copy: print('Copying model and other training files to {}'.format(save_copy)) copy_tree(save_path, save_copy)
def execute(dataset, n_hidden_t_enc, n_hidden_s, num_epochs=500, learning_rate=.001, learning_rate_annealing=1.0, gamma=1, lmd=0., disc_nonlinearity="sigmoid", keep_labels=1.0, prec_recall_cutoff=True, missing_labels_val=-1.0, which_fold=1, early_stop_criterion='loss', embedding_input='raw', save_path='/Tmp/romerosa/feature_selection/', save_copy='/Tmp/romerosa/feature_selection/', dataset_path='/Tmp/carriepl/datasets/', resume=False, exp_name=None): # Load the dataset print("Loading data") x_train, y_train, x_valid, y_valid, x_test, y_test, \ x_unsup, training_labels = mlh.load_data( dataset, dataset_path, None, which_fold=which_fold, keep_labels=keep_labels, missing_labels_val=missing_labels_val, embedding_input=embedding_input) # Extract required information from data n_samples, n_feats = x_train.shape print("Number of features : ", n_feats) print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1])) n_targets = y_train.shape[1] # Set some variables batch_size = 1 # Preparing folder to save stuff print("Experiment: " + exp_name) save_path = os.path.join(save_path, dataset, exp_name) save_copy = os.path.join(save_copy, dataset, exp_name) if not os.path.exists(save_path): os.makedirs(save_path) # Prepare Theano variables for inputs and targets input_var_sup = T.matrix('input_sup') target_var_sup = T.matrix('target_sup') lr = theano.shared(np.float32(learning_rate), 'learning_rate') # Build model print("Building model") discrim_net = InputLayer((None, n_feats), input_var_sup) discrim_net = DenseLayer(discrim_net, num_units=n_hidden_t_enc[-1], nonlinearity=rectify) # Reconstruct the input using dec_feat_emb if gamma > 0: reconst_net = DenseLayer(discrim_net, num_units=n_feats, nonlinearity=linear) nets = [reconst_net] else: nets = [None] # Add supervised hidden layers for hid in n_hidden_s: discrim_net = DropoutLayer(discrim_net) discrim_net = DenseLayer(discrim_net, num_units=hid) assert disc_nonlinearity in ["sigmoid", "linear", "rectify", "softmax"] discrim_net = DropoutLayer(discrim_net) discrim_net = DenseLayer(discrim_net, num_units=n_targets, nonlinearity=eval(disc_nonlinearity)) print("Building and compiling training functions") # Build and compile training functions predictions, predictions_det = mh.define_predictions(nets, start=0) prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net]) prediction_sup = prediction_sup[0] prediction_sup_det = prediction_sup_det[0] # Define losses # reconstruction losses reconst_losses, reconst_losses_det = mh.define_reconst_losses( predictions, predictions_det, [input_var_sup]) # supervised loss sup_loss, sup_loss_det = mh.define_sup_loss(disc_nonlinearity, prediction_sup, prediction_sup_det, keep_labels, target_var_sup, missing_labels_val) inputs = [input_var_sup, target_var_sup] params = lasagne.layers.get_all_params([discrim_net] + nets, trainable=True) print('Number of params: ' + str(len(params))) # Combine losses loss = sup_loss + gamma * reconst_losses[0] loss_det = sup_loss_det + gamma * reconst_losses_det[0] l2_penalty = apply_penalty(params, l2) loss = loss + lmd * l2_penalty loss_det = loss_det + lmd * l2_penalty # Compute network updates updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr) # updates = lasagne.updates.sgd(loss, # params, # learning_rate=lr) # updates = lasagne.updates.momentum(loss, params, # learning_rate=lr, momentum=0.0) # Apply norm constraints on the weights for k in updates.keys(): if updates[k].ndim == 2: updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0) # Compile training function train_fn = theano.function(inputs, loss, updates=updates, on_unused_input='ignore') # Monitoring Labels monitor_labels = ["reconst. loss"] monitor_labels = [ i for i, j in zip(monitor_labels, reconst_losses) if j != 0 ] monitor_labels += ["loss. sup.", "total loss"] # Build and compile test function val_outputs = reconst_losses_det val_outputs = [i for i, j in zip(val_outputs, reconst_losses) if j != 0] val_outputs += [sup_loss_det, loss_det] # Compute accuracy and add it to monitoring list test_acc, test_pred = mh.define_test_functions(disc_nonlinearity, prediction_sup, prediction_sup_det, target_var_sup) monitor_labels.append("accuracy") val_outputs.append(test_acc) # Compile prediction function predict = theano.function([input_var_sup], test_pred) # Compile validation function val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs, on_unused_input='ignore') # Finally, launch the training loop. print("Starting testing...") if not os.path.exists(save_copy + '/model_feat_sel_best.npz'): print("No saved model to be tested and/or generate" " the embedding !") else: with np.load(save_copy + '/model_feat_sel_best.npz', ) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values( filter(None, nets) + [discrim_net], param_values) test_minibatches = mlh.iterate_minibatches(x_test, y_test, batch_size, shuffle=False) test_err, pred, targets = mlh.monitoring(test_minibatches, "test", val_fn, monitor_labels, prec_recall_cutoff, return_pred=True) lab = targets.argmax(1) pred_argmax = pred.argmax(1) continent_cat = mh.create_1000_genomes_continent_labels() lab_cont = np.zeros(lab.shape) pred_cont = np.zeros(pred_argmax.shape) for i, c in enumerate(continent_cat): for el in c: lab_cont[lab == el] = i pred_cont[pred_argmax == el] = i cm_e = np.zeros((26, 26)) cm_c = np.zeros((5, 5)) for i in range(26): for j in range(26): cm_e[i, j] = ((pred_argmax == i) * (lab == j)).sum() for i in range(5): for j in range(5): cm_c[i, j] = ((pred_cont == i) * (lab_cont == j)).sum() np.savez(os.path.join(save_copy, 'cm' + str(which_fold) + '.npz'), cm_e=cm_e, cm_c=cm_c) print(os.path.join(save_copy, 'cm' + str(which_fold) + '.npz'))
def execute(dataset, n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, n_hidden_s, embedding_source=None, num_epochs=500, learning_rate=.001, learning_rate_annealing=1.0, alpha=1, beta=1, gamma=1, lmd=.0001, disc_nonlinearity="sigmoid", encoder_net_init=0.2, decoder_net_init=0.2, keep_labels=1.0, prec_recall_cutoff=True, missing_labels_val=-1.0, which_fold=0, early_stop_criterion='loss_sup_det', embedding_input='raw', model_path='/Tmp/' + os.environ["USER"] + '/feature_selection/newmodel/', save_path='/Tmp/' + os.environ["USER"] + '/feature_selection/', dataset_path='/Tmp/' + os.environ["USER"] + '/datasets/', resume=False, exp_name=''): # Load the dataset print("Loading data") x_train, y_train, x_valid, y_valid, x_test, y_test, \ x_unsup, training_labels = mlh.load_data( dataset, dataset_path, embedding_source, which_fold=which_fold, keep_labels=keep_labels, missing_labels_val=missing_labels_val, embedding_input=embedding_input) if x_unsup is not None: n_samples_unsup = x_unsup.shape[1] else: n_samples_unsup = 0 # Extract required information from data n_samples, n_feats = x_train.shape print("Number of features : ", n_feats) print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1])) n_targets = y_train.shape[1] # Set some variables batch_size = 1 beta = gamma if (gamma == 0) else beta # Preparing folder to save stuff if embedding_source is None: embedding_name = embedding_input else: embedding_name = embedding_source.replace("_", "").split(".")[0] # exp_name = embedding_name.rsplit('/', 1)[::-1][0] + '_' # exp_name += '_new_' # exp_name += mlh.define_exp_name(keep_labels, alpha, beta, gamma, lmd, # n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, # n_hidden_s, which_fold, embedding_input, # learning_rate, decoder_net_init, # encoder_net_init, early_stop_criterion, # learning_rate_annealing) print("Experiment: " + exp_name) model_path = os.path.join(model_path, dataset, exp_name) print(model_path) save_path = os.path.join(save_path, dataset, exp_name) if not os.path.exists(save_path): os.makedirs(save_path) # Prepare Theano variables for inputs and targets input_var_sup = T.matrix('input_sup') input_var_unsup = theano.shared(x_unsup, 'input_unsup') # x_unsup TBD target_var_sup = T.matrix('target_sup') lr = theano.shared(np.float32(learning_rate), 'learning_rate') # Build model print("Building model") # Some checkings # assert len(n_hidden_u) > 0 assert len(n_hidden_t_enc) > 0 assert len(n_hidden_t_dec) > 0 assert n_hidden_t_dec[-1] == n_hidden_t_enc[-1] # Build feature embedding networks (encoding and decoding if gamma > 0) nets, embeddings, pred_feat_emb = mh.build_feat_emb_nets( embedding_source, n_feats, n_samples_unsup, input_var_unsup, n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, gamma, encoder_net_init, decoder_net_init, save_path) # Build feature embedding reconstruction networks (if alpha > 0, beta > 0) nets += mh.build_feat_emb_reconst_nets( [alpha, beta], n_samples_unsup, n_hidden_u, [n_hidden_t_enc, n_hidden_t_dec], nets, [encoder_net_init, decoder_net_init]) # Supervised network discrim_net, hidden_rep = mh.build_discrim_net( batch_size, n_feats, input_var_sup, n_hidden_t_enc, n_hidden_s, embeddings[0], disc_nonlinearity, n_targets) # Reconstruct network nets += [mh.build_reconst_net(hidden_rep, embeddings[1] if len(embeddings) > 1 else None, n_feats, gamma)] # Load best model with np.load(os.path.join(model_path, 'model_feat_sel_best.npz')) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(filter(None, nets) + [discrim_net], param_values) print("Building and compiling training functions") # Build and compile training functions predictions, predictions_det = mh.define_predictions(nets, start=2) prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net]) prediction_sup = prediction_sup[0] prediction_sup_det = prediction_sup_det[0] # Define losses # reconstruction losses _, reconst_losses_det = mh.define_reconst_losses( predictions, predictions_det, [input_var_unsup, input_var_unsup, input_var_sup]) # supervised loss _, sup_loss_det = mh.define_sup_loss( disc_nonlinearity, prediction_sup, prediction_sup_det, keep_labels, target_var_sup, missing_labels_val) # Define inputs inputs = [input_var_sup, target_var_sup] # Combine losses loss_det = sup_loss_det + alpha*reconst_losses_det[0] + \ beta*reconst_losses_det[1] + gamma*reconst_losses_det[2] # Define parameters params = lasagne.layers.get_all_params( [discrim_net] + filter(None, nets), trainable=True) l2_penalty = apply_penalty(params, l2) loss_det = loss_det + lmd*l2_penalty # Monitoring Labels monitor_labels = ["reconst. feat. W_enc", "reconst. feat. W_dec", "reconst. loss"] monitor_labels = [i for i, j in zip(monitor_labels, reconst_losses_det) if j != 0] monitor_labels += ["feat. W_enc. mean", "feat. W_enc var"] monitor_labels += ["feat. W_dec. mean", "feat. W_dec var"] if \ (embeddings[1] is not None) else [] monitor_labels += ["loss. sup.", "total loss"] # Build and compile test function val_outputs = reconst_losses_det val_outputs = [i for i, j in zip(val_outputs, reconst_losses_det) if j != 0] val_outputs += [embeddings[0].mean(), embeddings[0].var()] val_outputs += [embeddings[1].mean(), embeddings[1].var()] if \ (embeddings[1] is not None) else [] val_outputs += [sup_loss_det, loss_det] # Compute accuracy and add it to monitoring list test_acc, test_pred = mh.define_test_functions( disc_nonlinearity, prediction_sup, prediction_sup_det, target_var_sup) monitor_labels.append("accuracy") val_outputs.append(test_acc) # Compile prediction function predict = theano.function([input_var_sup], test_pred) # Compile validation function val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs, on_unused_input='ignore') # Finally, launch the testing loop. print("Starting testing...") test_minibatches = mlh.iterate_minibatches(x_test, y_test, batch_size, shuffle=False) test_err, pred, targets = mlh.monitoring(test_minibatches, "test", val_fn, monitor_labels, prec_recall_cutoff, return_pred=True) lab = targets.argmax(1) pred_argmax = pred.argmax(1) continent_cat = mh.create_1000_genomes_continent_labels() lab_cont = np.zeros(lab.shape) pred_cont = np.zeros(pred_argmax.shape) for i,c in enumerate(continent_cat): for el in c: lab_cont[lab == el] = i pred_cont[pred_argmax == el] = i cm_e = np.zeros((26, 26)) cm_c = np.zeros((5,5)) for i in range(26): for j in range(26): cm_e[i, j] = ((pred_argmax == i) * (lab == j)).sum() for i in range(5): for j in range(5): cm_c[i, j] = ((pred_cont == i) * (lab_cont == j)).sum() np.savez(os.path.join(save_path, 'cm'+str(which_fold)+'.npz'), cm_e=cm_e, cm_c=cm_c) print(os.path.join(save_path, 'cm' + str(which_fold) + '.npz'))
def execute( dataset, n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, n_hidden_s, embedding_source=None, num_epochs=500, learning_rate=.001, learning_rate_annealing=1.0, alpha=1, beta=1, gamma=1, lmd=.0001, disc_nonlinearity="sigmoid", encoder_net_init=0.2, decoder_net_init=0.2, keep_labels=1.0, prec_recall_cutoff=True, missing_labels_val=-1.0, which_fold=0, early_stop_criterion='loss_sup_det', embedding_input='raw', save_path='/Tmp/' + os.environ["USER"] + '/savepath/', # a default value was needed? save_copy='/Tmp/' + os.environ["USER"] + '/savecopy/', dataset_path='/Tmp/' + os.environ["USER"] + '/datasets/', resume=False, exp_name='', random_proj=0): # Load the dataset print("Loading data") x_train, y_train, x_valid, y_valid, x_test, y_test, \ x_unsup, training_labels = mlh.load_data( dataset, dataset_path, embedding_source, which_fold=which_fold, keep_labels=keep_labels, missing_labels_val=missing_labels_val, embedding_input=embedding_input) if x_unsup is not None: n_samples_unsup = x_unsup.shape[1] else: n_samples_unsup = 0 # Extract required information from data n_samples, n_feats = x_train.shape print("Number of features : ", n_feats) print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1])) n_targets = y_train.shape[1] # Set some variables batch_size = 128 beta = gamma if (gamma == 0) else beta # Preparing folder to save stuff if embedding_source is None: embedding_name = embedding_input else: embedding_name = embedding_source.replace("_", "").split(".")[0] exp_name += embedding_name.rsplit('/', 1)[::-1][0] + '_' exp_name += 'final_' exp_name += mlh.define_exp_name(keep_labels, alpha, beta, gamma, lmd, n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, n_hidden_s, which_fold, embedding_input, learning_rate, decoder_net_init, encoder_net_init, early_stop_criterion, learning_rate_annealing) print("Experiment: " + exp_name) save_path = os.path.join(save_path, dataset, exp_name) save_copy = os.path.join(save_copy, dataset, exp_name) if not os.path.exists(save_path): os.makedirs(save_path) if not os.path.exists(save_copy): os.makedirs(save_copy) # Prepare Theano variables for inputs and targets input_var_sup = T.matrix('input_sup') input_var_unsup = theano.shared(x_unsup, 'input_unsup') # x_unsup TBD target_var_sup = T.matrix('target_sup') lr = theano.shared(np.float32(learning_rate), 'learning_rate') # Build model print("Building model") # Some checkings # assert len(n_hidden_u) > 0 assert len(n_hidden_t_enc) > 0 assert len(n_hidden_t_dec) > 0 assert n_hidden_t_dec[-1] == n_hidden_t_enc[-1] # Build feature embedding networks (encoding and decoding if gamma > 0) nets, embeddings, pred_feat_emb = mh.build_feat_emb_nets( embedding_source, n_feats, n_samples_unsup, input_var_unsup, n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, gamma, encoder_net_init, decoder_net_init, save_path, random_proj) # Build feature embedding reconstruction networks (if alpha > 0, beta > 0) nets += mh.build_feat_emb_reconst_nets( [alpha, beta], n_samples_unsup, n_hidden_u, [n_hidden_t_enc, n_hidden_t_dec], nets, [encoder_net_init, decoder_net_init]) # Supervised network discrim_net, hidden_rep = mh.build_discrim_net( batch_size, n_feats, input_var_sup, n_hidden_t_enc, n_hidden_s, embeddings[0], disc_nonlinearity, n_targets) # Reconstruct network nets += [ mh.build_reconst_net(hidden_rep, embeddings[1] if len(embeddings) > 1 else None, n_feats, gamma) ] # Load weights if we are resuming job if resume: # Load best model with np.load(os.path.join(save_path, 'model_feat_sel_last.npz')) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] nlayers = len( lasagne.layers.get_all_params(filter(None, nets) + [discrim_net])) lasagne.layers.set_all_param_values( filter(None, nets) + [discrim_net], param_values[:nlayers]) print("Building and compiling training functions") # Build and compile training functions predictions, predictions_det = mh.define_predictions(nets, start=2) prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net]) prediction_sup = prediction_sup[0] prediction_sup_det = prediction_sup_det[0] # Define losses # reconstruction losses reconst_losses, reconst_losses_det = mh.define_reconst_losses( predictions, predictions_det, [input_var_unsup, input_var_unsup, input_var_sup]) # supervised loss sup_loss, sup_loss_det = mh.define_sup_loss(disc_nonlinearity, prediction_sup, prediction_sup_det, keep_labels, target_var_sup, missing_labels_val) # Define inputs inputs = [input_var_sup, target_var_sup] # Define parameters params = lasagne.layers.get_all_params([discrim_net] + filter(None, nets), trainable=True) params_to_freeze= \ lasagne.layers.get_all_params(filter(None, nets), trainable=False) print('Number of params discrim: ' + str(len(params))) print('Number of params to freeze: ' + str(len(params_to_freeze))) for p in params_to_freeze: new_params = [el for el in params if el != p] params = new_params print('Number of params to update: ' + str(len(params))) # Combine losses loss = sup_loss + alpha*reconst_losses[0] + beta*reconst_losses[1] + \ gamma*reconst_losses[2] loss_det = sup_loss_det + alpha*reconst_losses_det[0] + \ beta*reconst_losses_det[1] + gamma*reconst_losses_det[2] l2_penalty = apply_penalty(params, l2) loss = loss + lmd * l2_penalty loss_det = loss_det + lmd * l2_penalty # Compute network updates updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr) # updates = lasagne.updates.sgd(loss, # params, # learning_rate=lr) # updates = lasagne.updates.momentum(loss, params, # learning_rate=lr, momentum=0.0) # Apply norm constraints on the weights for k in updates.keys(): if updates[k].ndim == 2: updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0) # Compile training function train_fn = theano.function(inputs, loss, updates=updates, on_unused_input='ignore') # Monitoring Labels monitor_labels = [ "reconst. feat. W_enc", "reconst. feat. W_dec", "reconst. loss" ] monitor_labels = [ i for i, j in zip(monitor_labels, reconst_losses) if j != 0 ] monitor_labels += ["feat. W_enc. mean", "feat. W_enc var"] monitor_labels += ["feat. W_dec. mean", "feat. W_dec var"] if \ (embeddings[1] is not None) else [] monitor_labels += ["loss. sup.", "total loss"] # Build and compile test function val_outputs = reconst_losses_det val_outputs = [i for i, j in zip(val_outputs, reconst_losses) if j != 0] val_outputs += [embeddings[0].mean(), embeddings[0].var()] val_outputs += [embeddings[1].mean(), embeddings[1].var()] if \ (embeddings[1] is not None) else [] val_outputs += [sup_loss_det, loss_det] # Compute accuracy and add it to monitoring list test_acc, test_pred = mh.define_test_functions(disc_nonlinearity, prediction_sup, prediction_sup_det, target_var_sup) monitor_labels.append("accuracy") val_outputs.append(test_acc) # Compile prediction function predict = theano.function([input_var_sup], test_pred) # Compile validation function val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs, on_unused_input='ignore') # Finally, launch the training loop. print("Starting training...") # Some variables max_patience = 100 patience = 0 train_monitored = [] valid_monitored = [] train_loss = [] # Pre-training monitoring print("Epoch 0 of {}".format(num_epochs)) train_minibatches = mlh.iterate_minibatches(x_train, y_train, batch_size, shuffle=False) train_err = mlh.monitoring(train_minibatches, "train", val_fn, monitor_labels, prec_recall_cutoff) valid_minibatches = mlh.iterate_minibatches(x_valid, y_valid, batch_size, shuffle=False) valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn, monitor_labels, prec_recall_cutoff) # Training loop start_training = time.time() for epoch in range(num_epochs): start_time = time.time() print("Epoch {} of {}".format(epoch + 1, num_epochs)) nb_minibatches = 0 loss_epoch = 0 # Train pass for batch in mlh.iterate_minibatches(x_train, training_labels, batch_size, shuffle=True): loss_epoch += train_fn(*batch) nb_minibatches += 1 loss_epoch /= nb_minibatches train_loss += [loss_epoch] # Monitoring on the training set train_minibatches = mlh.iterate_minibatches(x_train, y_train, batch_size, shuffle=False) train_err = mlh.monitoring(train_minibatches, "train", val_fn, monitor_labels, prec_recall_cutoff) train_monitored += [train_err] # Monitoring on the validation set valid_minibatches = mlh.iterate_minibatches(x_valid, y_valid, batch_size, shuffle=False) valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn, monitor_labels, prec_recall_cutoff) valid_monitored += [valid_err] try: early_stop_val = valid_err[monitor_labels.index( early_stop_criterion)] except: raise ValueError("There is no monitored value by the name of %s" % early_stop_criterion) # Early stopping if epoch == 0: best_valid = early_stop_val elif (early_stop_val > best_valid and early_stop_criterion == 'accuracy') or \ (early_stop_val < best_valid and early_stop_criterion == 'loss. sup.'): best_valid = early_stop_val patience = 0 # Save stuff np.savez( os.path.join(save_path, 'model_feat_sel_best.npz'), *lasagne.layers.get_all_param_values( filter(None, nets) + [discrim_net])) np.savez(save_path + "/errors_supervised_best.npz", zip(*train_monitored), zip(*valid_monitored)) # Monitor on the test set now because sometimes the saving doesn't # go well and there isn't a model to load at the end of training if y_test is not None: test_minibatches = mlh.iterate_minibatches(x_test, y_test, 138, shuffle=False) test_err = mlh.monitoring(test_minibatches, "test", val_fn, monitor_labels, prec_recall_cutoff) else: patience += 1 # Save stuff np.savez( os.path.join(save_path, 'model_feat_sel_last.npz'), *lasagne.layers.get_all_param_values( filter(None, nets) + [discrim_net])) np.savez(save_path + "/errors_supervised_last.npz", zip(*train_monitored), zip(*valid_monitored)) # End training if patience == max_patience or epoch == num_epochs - 1: print("Ending training") # Load best model with np.load(os.path.join(save_path, 'model_feat_sel_best.npz')) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] nlayers = len( lasagne.layers.get_all_params( filter(None, nets) + [discrim_net])) lasagne.layers.set_all_param_values( filter(None, nets) + [discrim_net], param_values[:nlayers]) if embedding_source is None: # Save embedding pred = pred_feat_emb() np.savez(os.path.join(save_path, 'feature_embedding.npz'), pred) # Training set results train_minibatches = mlh.iterate_minibatches(x_train, y_train, batch_size, shuffle=False) train_err = mlh.monitoring(train_minibatches, "train", val_fn, monitor_labels, prec_recall_cutoff) # Validation set results valid_minibatches = mlh.iterate_minibatches(x_valid, y_valid, batch_size, shuffle=False) valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn, monitor_labels, prec_recall_cutoff) # Test set results if y_test is not None: test_minibatches = mlh.iterate_minibatches(x_test, y_test, 138, shuffle=False) test_err = mlh.monitoring(test_minibatches, "test", val_fn, monitor_labels, prec_recall_cutoff) np.savez(os.path.join(save_path, 'final_errors.npz'), test_err) else: for minibatch in mlh.iterate_testbatches(x_test, 138, shuffle=False): test_predictions = [] test_predictions += [predict(minibatch)] np.savez(os.path.join(save_path, 'test_predictions.npz'), test_predictions) # Stop print(" epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time)) break print(" epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time)) # Anneal the learning rate lr.set_value(float(lr.get_value() * learning_rate_annealing)) # Print and save all final errors for train, validation and test print("Training time:\t\t\t{:.3f}s".format(time.time() - start_training)) print("test_err:", test_err) # Copy files to loadpath if save_path != save_copy: print('Copying model and other training files to {}'.format(save_copy)) copy_tree(save_path, save_copy)
def execute(dataset, n_hidden_t_enc, n_hidden_s, num_epochs=500, learning_rate=.001, learning_rate_annealing=1.0, gamma=1, lmd=0., disc_nonlinearity="sigmoid", keep_labels=1.0, prec_recall_cutoff=True, missing_labels_val=-1.0, which_fold=1, early_stop_criterion='loss', save_path='/Tmp/romerosa/DietNetworks/', save_copy='/Tmp/romerosa/DietNetworks/', dataset_path='/Tmp/carriepl/datasets/', resume=False): # Load the dataset print("Loading data") x_train, y_train, x_valid, y_valid, x_test, y_test, \ x_unsup, training_labels = mlh.load_data( dataset, dataset_path, None, which_fold=which_fold, keep_labels=keep_labels, missing_labels_val=missing_labels_val, embedding_input='raw') # Extract required information from data n_samples, n_feats = x_train.shape print("Number of features : ", n_feats) print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1])) n_targets = y_train.shape[1] # Set some variables batch_size = 128 # Preparing folder to save stuff exp_name = 'basic_' + mlh.define_exp_name( keep_labels, 0, 0, gamma, lmd, [], n_hidden_t_enc, [], n_hidden_s, which_fold, learning_rate, 0, 0, early_stop_criterion, learning_rate_annealing) print("Experiment: " + exp_name) save_path = os.path.join(save_path, dataset, exp_name) save_copy = os.path.join(save_copy, dataset, exp_name) if not os.path.exists(save_path): os.makedirs(save_path) # Prepare Theano variables for inputs and targets input_var_sup = T.matrix('input_sup') target_var_sup = T.matrix('target_sup') lr = theano.shared(np.float32(learning_rate), 'learning_rate') # Build model print("Building model") discrim_net = InputLayer((None, n_feats), input_var_sup) discrim_net = DenseLayer(discrim_net, num_units=n_hidden_t_enc[-1], nonlinearity=rectify) # Reconstruct the input using dec_feat_emb if gamma > 0: reconst_net = DenseLayer(discrim_net, num_units=n_feats, nonlinearity=linear) nets = [reconst_net] else: nets = [None] # Add supervised hidden layers for hid in n_hidden_s: discrim_net = DropoutLayer(discrim_net) discrim_net = DenseLayer(discrim_net, num_units=hid) assert disc_nonlinearity in ["sigmoid", "linear", "rectify", "softmax"] discrim_net = DropoutLayer(discrim_net) discrim_net = DenseLayer(discrim_net, num_units=n_targets, nonlinearity=eval(disc_nonlinearity)) print("Building and compiling training functions") # Build and compile training functions predictions, predictions_det = mh.define_predictions(nets, start=0) prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net]) prediction_sup = prediction_sup[0] prediction_sup_det = prediction_sup_det[0] # Define losses # reconstruction losses reconst_losses, reconst_losses_det = mh.define_reconst_losses( predictions, predictions_det, [input_var_sup]) # supervised loss sup_loss, sup_loss_det = mh.define_sup_loss(disc_nonlinearity, prediction_sup, prediction_sup_det, keep_labels, target_var_sup, missing_labels_val) inputs = [input_var_sup, target_var_sup] params = lasagne.layers.get_all_params([discrim_net] + nets, trainable=True) print('Number of params: ' + str(len(params))) # Combine losses loss = sup_loss + gamma * reconst_losses[0] loss_det = sup_loss_det + gamma * reconst_losses_det[0] l2_penalty = apply_penalty(params, l2) loss = loss + lmd * l2_penalty loss_det = loss_det + lmd * l2_penalty # Compute network updates updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr) # updates = lasagne.updates.sgd(loss, # params, # learning_rate=lr) # updates = lasagne.updates.momentum(loss, params, # learning_rate=lr, momentum=0.0) # Apply norm constraints on the weights for k in updates.keys(): if updates[k].ndim == 2: updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0) # Compile training function train_fn = theano.function(inputs, loss, updates=updates, on_unused_input='ignore') # Monitoring Labels monitor_labels = ["reconst. loss"] monitor_labels = [ i for i, j in zip(monitor_labels, reconst_losses) if j != 0 ] monitor_labels += ["loss. sup.", "total loss"] # Build and compile test function val_outputs = reconst_losses_det val_outputs = [i for i, j in zip(val_outputs, reconst_losses) if j != 0] val_outputs += [sup_loss_det, loss_det] # Compute accuracy and add it to monitoring list test_acc, test_pred = mh.define_test_functions(disc_nonlinearity, prediction_sup, prediction_sup_det, target_var_sup) monitor_labels.append("accuracy") val_outputs.append(test_acc) # Compile prediction function predict = theano.function([input_var_sup], test_pred) # Compile validation function val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs, on_unused_input='ignore') # Finally, launch the training loop. print("Starting training...") # Some variables max_patience = 100 patience = 0 train_monitored = [] valid_monitored = [] train_loss = [] # Pre-training monitoring print("Epoch 0 of {}".format(num_epochs)) train_minibatches = mlh.iterate_minibatches(x_train, y_train, batch_size, shuffle=False) train_err = mlh.monitoring(train_minibatches, "train", val_fn, monitor_labels, prec_recall_cutoff) valid_minibatches = mlh.iterate_minibatches(x_valid, y_valid, batch_size, shuffle=False) valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn, monitor_labels, prec_recall_cutoff) # Training loop start_training = time.time() for epoch in range(num_epochs): start_time = time.time() print("Epoch {} of {}".format(epoch + 1, num_epochs)) nb_minibatches = 0 loss_epoch = 0 # Train pass for batch in mlh.iterate_minibatches(x_train, training_labels, batch_size, shuffle=True): loss_epoch += train_fn(*batch) nb_minibatches += 1 loss_epoch /= nb_minibatches train_loss += [loss_epoch] # Monitoring on the training set train_minibatches = mlh.iterate_minibatches(x_train, y_train, batch_size, shuffle=False) train_err = mlh.monitoring(train_minibatches, "train", val_fn, monitor_labels, prec_recall_cutoff) train_monitored += [train_err] # Monitoring on the validation set valid_minibatches = mlh.iterate_minibatches(x_valid, y_valid, batch_size, shuffle=False) valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn, monitor_labels, prec_recall_cutoff) valid_monitored += [valid_err] try: early_stop_val = valid_err[monitor_labels.index( early_stop_criterion)] except: raise ValueError("There is no monitored value by the name of %s" % early_stop_criterion) # Early stopping if epoch == 0: best_valid = early_stop_val elif (early_stop_val > best_valid and early_stop_criterion == 'accuracy') or \ (early_stop_val < best_valid and early_stop_criterion == 'loss. sup.'): best_valid = early_stop_val patience = 0 # Save stuff np.savez( os.path.join(save_path, 'model_best.npz'), *lasagne.layers.get_all_param_values( filter(None, nets) + [discrim_net])) np.savez(save_path + "/errors_supervised_best.npz", zip(*train_monitored), zip(*valid_monitored)) else: patience += 1 np.savez( os.path.join(save_path, 'model_last.npz'), *lasagne.layers.get_all_param_values( filter(None, nets) + [discrim_net])) np.savez(save_path + "/errors_supervised_last.npz", zip(*train_monitored), zip(*valid_monitored)) # End training if patience == max_patience or epoch == num_epochs - 1: print("Ending training") # Load best model if not os.path.exists(save_path + '/model_best.npz'): print("No saved model to be tested and/or generate" " the embedding !") else: with np.load(save_path + '/model_best.npz', ) as f: param_values = [ f['arr_%d' % i] for i in range(len(f.files)) ] lasagne.layers.set_all_param_values( filter(None, nets) + [discrim_net], param_values) # Training set results train_minibatches = mlh.iterate_minibatches(x_train, y_train, batch_size, shuffle=False) train_err = mlh.monitoring(train_minibatches, "train", val_fn, monitor_labels, prec_recall_cutoff) # Validation set results valid_minibatches = mlh.iterate_minibatches(x_valid, y_valid, batch_size, shuffle=False) valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn, monitor_labels, prec_recall_cutoff) # Test set results if y_test is not None: test_minibatches = mlh.iterate_minibatches(x_test, y_test, batch_size, shuffle=False) test_err = mlh.monitoring(test_minibatches, "test", val_fn, monitor_labels, prec_recall_cutoff) else: for minibatch in mlh.iterate_testbatches(x_test, batch_size, shuffle=False): test_predictions = [] test_predictions += [predict(minibatch)] np.savez(os.path.join(save_path, 'test_predictions.npz'), test_predictions) # Stop print(" epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time)) break print(" epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time)) # Anneal the learning rate lr.set_value(float(lr.get_value() * learning_rate_annealing)) # Print all final errors for train, validation and test print("Training time:\t\t\t{:.3f}s".format(time.time() - start_training)) # Copy files to loadpath if save_path != save_copy: print('Copying model and other training files to {}'.format(save_copy)) copy_tree(save_path, save_copy)