def train(): """ Performs training and evaluation of Regression model. """ print("Training started") # Set the random seeds for reproducibility np.random.seed(42) torch.manual_seed(42) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Get number of units in each hidden layer if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # convert dropout percentages dropout_percentages = [ int(perc) for perc in FLAGS.dropout_percentages.split(',') ] # check if length of dropout is equal to nr of hidden layers if len(dropout_percentages) != len(dnn_hidden_units): dropout_len = len(dropout_percentages) hidden_len = len(dnn_hidden_units) if dropout_len < hidden_len: for _ in range(hidden_len - dropout_len): dropout_percentages.append(0) else: dropout_percentages = dropout_percentages[:hidden_len] # use GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device :", device) # extract all data and divide into train, valid and split dataloaders with open(os.path.join(FLAGS.data_dir, "dataset.p"), "rb") as f: dataset = pkl.load(f) len_all = len(dataset) train_len, valid_len = int(0.7 * len_all), int(0.15 * len_all) test_len = len_all - train_len - valid_len splits = [train_len, valid_len, test_len] train_data, valid_data, test_data = random_split(dataset, splits) train_dl = DataLoader(train_data, batch_size=64, shuffle=True) valid_dl = DataLoader(valid_data, batch_size=64, shuffle=True, drop_last=True) test_dl = DataLoader(test_data, batch_size=64, shuffle=True, drop_last=True) # initialize MLP and loss function nn = Regression(5387, dnn_hidden_units, dropout_percentages, 1, FLAGS.neg_slope, FLAGS.batchnorm).to(device) loss_function = torch.nn.MSELoss() # initialize optimizer if FLAGS.optimizer == "SGD": optimizer = torch.optim.SGD(nn.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weightdecay, momentum=FLAGS.momentum) elif FLAGS.optimizer == "Adam": optimizer = torch.optim.Adam(nn.parameters(), lr=FLAGS.learning_rate, amsgrad=FLAGS.amsgrad, weight_decay=FLAGS.weightdecay) elif FLAGS.optimizer == "AdamW": optimizer = torch.optim.AdamW(nn.parameters(), lr=FLAGS.learning_rate, amsgrad=FLAGS.amsgrad, weight_decay=FLAGS.weightdecay) elif FLAGS.optimizer == "RMSprop": optimizer = torch.optim.RMSprop(nn.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weightdecay, momentum=FLAGS.momentum) # initialization for plotting and metrics training_losses = [] valid_losses = [] # construct name for saving models and figures variables_string = f"{FLAGS.optimizer}_{FLAGS.learning_rate}_{FLAGS.weightdecay}_{FLAGS.dnn_hidden_units}_{FLAGS.dropout_percentages}_{FLAGS.batchnorm}_{FLAGS.nr_epochs}" # training loop for epoch in range(FLAGS.nr_epochs): print(f"\nEpoch: {epoch}") batch_losses = [] nn.train() for batch, (x, y) in enumerate(train_dl): # append label to batch print("y", y.shape) onehot_y = torch.nn.functional.one_hot(y.squeeze().to(torch.int64), num_classes=11) print("onehot", onehot_y.shape) x = torch.cat((x.reshape(x.shape[0], -1), onehot_y), 1) # squeeze the input, and put on device x = x.reshape(x.shape[0], -1).to(device) y = y.reshape(y.shape[0], -1).to(device) optimizer.zero_grad() # forward pass pred = nn(x).to(device) # compute loss and backpropagate loss = loss_function(pred, y) loss.backward() # update the weights optimizer.step() # save training loss batch_losses.append(loss.item()) avg_epoch_loss = np.mean(batch_losses) training_losses.append(avg_epoch_loss) print( f"Average batch loss (epoch {epoch}: {avg_epoch_loss} ({len(batch_losses)} batches)." ) # get loss on validation set and evaluate valid_losses.append(eval_on_test(nn, loss_function, valid_dl, device)) torch.save(nn.state_dict(), f"Models/Regression_{variables_string}.pt") # compute loss and accuracy on the test set test_loss = eval_on_test(nn, loss_function, test_dl, device) print(f"Loss on test set: {test_loss}") plotting(training_losses, valid_losses, test_loss, variables_string)
def train(): """ Performs training and evaluation of Regression model. """ # Set the random seeds for reproducibility np.random.seed(10) torch.manual_seed(10) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Get number of units in each hidden layer if FLAGS.dnn_hidden_units: dnn_hidden_units = FLAGS.dnn_hidden_units.split(",") dnn_hidden_units = [ int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units ] else: dnn_hidden_units = [] # convert dropout percentages dropout_probs = [float(prob) for prob in FLAGS.dropout_probs.split(',')] # check if length of dropout is equal to nr of hidden layers if len(dropout_probs) != len(dnn_hidden_units): dropout_len = len(dropout_probs) hidden_len = len(dnn_hidden_units) if dropout_len < hidden_len: for _ in range(hidden_len - dropout_len): dropout_probs.append(0) else: dropout_probs = dropout_probs[:hidden_len] # use GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device :", device) # extract all data and divide into train, valid and split dataloaders dataset_filename = f"dataset_filename=MIMICS-Click.tsv_expanded=False_balance=True_impression={FLAGS.impression}_reduced_classes={FLAGS.reduced_classes}_embedder={FLAGS.embedder}.p" with open(os.path.join(FLAGS.data_dir, dataset_filename), "rb") as f: dataset = pkl.load(f) len_all = len(dataset) train_len, valid_len = int(0.7 * len_all), int(0.15 * len_all) test_len = len_all - train_len - valid_len splits = [train_len, valid_len, test_len] train_data, valid_data, test_data = random_split(dataset, splits) train_dl = DataLoader(train_data, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) valid_dl = DataLoader(valid_data, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) test_dl = DataLoader(test_data, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) with open(f"{FLAGS.data_dir}/test_dl.pt", "wb") as f: pkl.dump(test_dl, f) # initialize MLP and loss function input_size = iter(train_dl).next()[0].shape[1] # 5376 for BERT embeddings nn = Regression(input_size, dnn_hidden_units, dropout_probs, 1, FLAGS.neg_slope, FLAGS.batchnorm).to(device) loss_function = torch.nn.MSELoss() if FLAGS.verbose: print(f"neural net:\n {[param.data for param in nn.parameters()]}") # initialize optimizer if FLAGS.optimizer == "SGD": optimizer = torch.optim.SGD(nn.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weightdecay, momentum=FLAGS.momentum) elif FLAGS.optimizer == "Adam": optimizer = torch.optim.Adam(nn.parameters(), lr=FLAGS.learning_rate, amsgrad=FLAGS.amsgrad, weight_decay=FLAGS.weightdecay) elif FLAGS.optimizer == "AdamW": optimizer = torch.optim.AdamW(nn.parameters(), lr=FLAGS.learning_rate, amsgrad=FLAGS.amsgrad, weight_decay=FLAGS.weightdecay) elif FLAGS.optimizer == "RMSprop": optimizer = torch.optim.RMSprop(nn.parameters(), lr=FLAGS.learning_rate, weight_decay=FLAGS.weightdecay, momentum=FLAGS.momentum) # initialization for plotting and metrics training_losses = [] valid_losses = [] initial_train_loss = eval_on_test(nn, loss_function, train_dl, device) training_losses.append(initial_train_loss) initial_valid_loss = eval_on_test(nn, loss_function, valid_dl, device) valid_losses.append(initial_valid_loss) # construct name for saving models and figures variables_string = f"regression_{FLAGS.embedder}_{FLAGS.impression}_{FLAGS.reduced_classes}_{FLAGS.optimizer}_{FLAGS.learning_rate}_{FLAGS.weightdecay}_{FLAGS.momentum}_{FLAGS.dnn_hidden_units}_{FLAGS.dropout_probs}_{FLAGS.batchnorm}_{FLAGS.nr_epochs}" overall_batch = 0 min_valid_loss = 10000 # training loop for epoch in range(FLAGS.nr_epochs): print(f"\nEpoch: {epoch}") for batch, (x, y) in enumerate(train_dl): nn.train() # squeeze the input, and put on device x = x.to(device) y = y.to(device) optimizer.zero_grad() # forward pass pred = nn(x).to(device) # compute loss and backpropagate loss = loss_function(pred, y) loss.backward() # update the weights optimizer.step() # save training loss training_losses.append(loss.item()) # print(f"batch loss ({batch}): {loss.item()}") # get loss on validation set and evaluate if overall_batch % FLAGS.eval_freq == 0 and overall_batch != 0: valid_loss = eval_on_test(nn, loss_function, valid_dl, device) valid_losses.append(valid_loss) print( f"Training loss: {loss.item()} / Valid loss: {valid_loss}") if valid_loss < min_valid_loss: print( f"Model is saved in epoch {epoch}, overall batch: {overall_batch}" ) torch.save(nn.state_dict(), f"Models/Regression_{variables_string}.pt") min_valid_loss = valid_loss optimal_batch = overall_batch overall_batch += 1 # Load the optimal model (with loweest validation loss, and evaluate on test set) optimal_nn = Regression(input_size, dnn_hidden_units, dropout_probs, 1, FLAGS.neg_slope, FLAGS.batchnorm).to(device) optimal_nn.load_state_dict( torch.load(f"Models/Regression_{variables_string}.pt")) test_loss, test_pred, test_true = eval_on_test(optimal_nn, loss_function, test_dl, device, verbose=FLAGS.verbose, return_preds=True) # save the test predictions of the regressor with open( f"Predictions/regression_test_preds{FLAGS.embedder}_{FLAGS.reduced_classes}_{FLAGS.impression}.pt", "wb") as f: pkl.dump(test_pred, f) print( f"Loss on test set of optimal model (batch {optimal_batch}): {test_loss}" ) significance_testing(test_pred, test_true, loss_function, FLAGS) if FLAGS.plotting: plotting(training_losses, valid_losses, test_loss, variables_string, optimal_batch, FLAGS)