Ejemplo n.º 1
0
def train():
    """
    Performs training and evaluation of Regression model.
    """
    print("Training started")
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Get number of units in each hidden layer
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # convert dropout percentages
    dropout_percentages = [
        int(perc) for perc in FLAGS.dropout_percentages.split(',')
    ]

    # check if length of dropout is equal to nr of hidden layers
    if len(dropout_percentages) != len(dnn_hidden_units):
        dropout_len = len(dropout_percentages)
        hidden_len = len(dnn_hidden_units)
        if dropout_len < hidden_len:
            for _ in range(hidden_len - dropout_len):
                dropout_percentages.append(0)
        else:
            dropout_percentages = dropout_percentages[:hidden_len]
    # use GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device :", device)

    # extract all data and divide into train, valid and split dataloaders
    with open(os.path.join(FLAGS.data_dir, "dataset.p"), "rb") as f:
        dataset = pkl.load(f)

    len_all = len(dataset)

    train_len, valid_len = int(0.7 * len_all), int(0.15 * len_all)
    test_len = len_all - train_len - valid_len
    splits = [train_len, valid_len, test_len]
    train_data, valid_data, test_data = random_split(dataset, splits)

    train_dl = DataLoader(train_data, batch_size=64, shuffle=True)
    valid_dl = DataLoader(valid_data,
                          batch_size=64,
                          shuffle=True,
                          drop_last=True)
    test_dl = DataLoader(test_data,
                         batch_size=64,
                         shuffle=True,
                         drop_last=True)

    # initialize MLP and loss function
    nn = Regression(5387, dnn_hidden_units, dropout_percentages, 1,
                    FLAGS.neg_slope, FLAGS.batchnorm).to(device)
    loss_function = torch.nn.MSELoss()

    # initialize optimizer
    if FLAGS.optimizer == "SGD":
        optimizer = torch.optim.SGD(nn.parameters(),
                                    lr=FLAGS.learning_rate,
                                    weight_decay=FLAGS.weightdecay,
                                    momentum=FLAGS.momentum)
    elif FLAGS.optimizer == "Adam":
        optimizer = torch.optim.Adam(nn.parameters(),
                                     lr=FLAGS.learning_rate,
                                     amsgrad=FLAGS.amsgrad,
                                     weight_decay=FLAGS.weightdecay)
    elif FLAGS.optimizer == "AdamW":
        optimizer = torch.optim.AdamW(nn.parameters(),
                                      lr=FLAGS.learning_rate,
                                      amsgrad=FLAGS.amsgrad,
                                      weight_decay=FLAGS.weightdecay)
    elif FLAGS.optimizer == "RMSprop":
        optimizer = torch.optim.RMSprop(nn.parameters(),
                                        lr=FLAGS.learning_rate,
                                        weight_decay=FLAGS.weightdecay,
                                        momentum=FLAGS.momentum)

    # initialization for plotting and metrics
    training_losses = []
    valid_losses = []

    # construct name for saving models and figures
    variables_string = f"{FLAGS.optimizer}_{FLAGS.learning_rate}_{FLAGS.weightdecay}_{FLAGS.dnn_hidden_units}_{FLAGS.dropout_percentages}_{FLAGS.batchnorm}_{FLAGS.nr_epochs}"

    # training loop
    for epoch in range(FLAGS.nr_epochs):

        print(f"\nEpoch: {epoch}")
        batch_losses = []
        nn.train()

        for batch, (x, y) in enumerate(train_dl):

            # append label to batch
            print("y", y.shape)
            onehot_y = torch.nn.functional.one_hot(y.squeeze().to(torch.int64),
                                                   num_classes=11)
            print("onehot", onehot_y.shape)
            x = torch.cat((x.reshape(x.shape[0], -1), onehot_y), 1)

            # squeeze the input, and put on device
            x = x.reshape(x.shape[0], -1).to(device)
            y = y.reshape(y.shape[0], -1).to(device)

            optimizer.zero_grad()

            # forward pass
            pred = nn(x).to(device)

            # compute loss and backpropagate
            loss = loss_function(pred, y)
            loss.backward()

            # update the weights
            optimizer.step()

            # save training loss
            batch_losses.append(loss.item())

        avg_epoch_loss = np.mean(batch_losses)
        training_losses.append(avg_epoch_loss)
        print(
            f"Average batch loss (epoch {epoch}: {avg_epoch_loss} ({len(batch_losses)} batches)."
        )

        # get loss on validation set and evaluate
        valid_losses.append(eval_on_test(nn, loss_function, valid_dl, device))
        torch.save(nn.state_dict(), f"Models/Regression_{variables_string}.pt")

    # compute loss and accuracy on the test set
    test_loss = eval_on_test(nn, loss_function, test_dl, device)
    print(f"Loss on test set: {test_loss}")

    plotting(training_losses, valid_losses, test_loss, variables_string)
Ejemplo n.º 2
0
def train():
    """
    Performs training and evaluation of Regression model.
    """
    # Set the random seeds for reproducibility
    np.random.seed(10)
    torch.manual_seed(10)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Get number of units in each hidden layer
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # convert dropout percentages
    dropout_probs = [float(prob) for prob in FLAGS.dropout_probs.split(',')]

    # check if length of dropout is equal to nr of hidden layers
    if len(dropout_probs) != len(dnn_hidden_units):
        dropout_len = len(dropout_probs)
        hidden_len = len(dnn_hidden_units)
        if dropout_len < hidden_len:
            for _ in range(hidden_len - dropout_len):
                dropout_probs.append(0)
        else:
            dropout_probs = dropout_probs[:hidden_len]
    # use GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device :", device)

    # extract all data and divide into train, valid and split dataloaders
    dataset_filename = f"dataset_filename=MIMICS-Click.tsv_expanded=False_balance=True_impression={FLAGS.impression}_reduced_classes={FLAGS.reduced_classes}_embedder={FLAGS.embedder}.p"
    with open(os.path.join(FLAGS.data_dir, dataset_filename), "rb") as f:
        dataset = pkl.load(f)

    len_all = len(dataset)

    train_len, valid_len = int(0.7 * len_all), int(0.15 * len_all)
    test_len = len_all - train_len - valid_len
    splits = [train_len, valid_len, test_len]
    train_data, valid_data, test_data = random_split(dataset, splits)

    train_dl = DataLoader(train_data,
                          batch_size=FLAGS.batch_size,
                          shuffle=True,
                          drop_last=True)
    valid_dl = DataLoader(valid_data,
                          batch_size=FLAGS.batch_size,
                          shuffle=True,
                          drop_last=True)
    test_dl = DataLoader(test_data,
                         batch_size=FLAGS.batch_size,
                         shuffle=True,
                         drop_last=True)

    with open(f"{FLAGS.data_dir}/test_dl.pt", "wb") as f:
        pkl.dump(test_dl, f)

    # initialize MLP and loss function
    input_size = iter(train_dl).next()[0].shape[1]  # 5376 for BERT embeddings
    nn = Regression(input_size, dnn_hidden_units, dropout_probs, 1,
                    FLAGS.neg_slope, FLAGS.batchnorm).to(device)
    loss_function = torch.nn.MSELoss()

    if FLAGS.verbose:
        print(f"neural net:\n {[param.data for param in nn.parameters()]}")

    # initialize optimizer
    if FLAGS.optimizer == "SGD":
        optimizer = torch.optim.SGD(nn.parameters(),
                                    lr=FLAGS.learning_rate,
                                    weight_decay=FLAGS.weightdecay,
                                    momentum=FLAGS.momentum)
    elif FLAGS.optimizer == "Adam":
        optimizer = torch.optim.Adam(nn.parameters(),
                                     lr=FLAGS.learning_rate,
                                     amsgrad=FLAGS.amsgrad,
                                     weight_decay=FLAGS.weightdecay)
    elif FLAGS.optimizer == "AdamW":
        optimizer = torch.optim.AdamW(nn.parameters(),
                                      lr=FLAGS.learning_rate,
                                      amsgrad=FLAGS.amsgrad,
                                      weight_decay=FLAGS.weightdecay)
    elif FLAGS.optimizer == "RMSprop":
        optimizer = torch.optim.RMSprop(nn.parameters(),
                                        lr=FLAGS.learning_rate,
                                        weight_decay=FLAGS.weightdecay,
                                        momentum=FLAGS.momentum)

    # initialization for plotting and metrics
    training_losses = []
    valid_losses = []

    initial_train_loss = eval_on_test(nn, loss_function, train_dl, device)
    training_losses.append(initial_train_loss)
    initial_valid_loss = eval_on_test(nn, loss_function, valid_dl, device)
    valid_losses.append(initial_valid_loss)

    # construct name for saving models and figures
    variables_string = f"regression_{FLAGS.embedder}_{FLAGS.impression}_{FLAGS.reduced_classes}_{FLAGS.optimizer}_{FLAGS.learning_rate}_{FLAGS.weightdecay}_{FLAGS.momentum}_{FLAGS.dnn_hidden_units}_{FLAGS.dropout_probs}_{FLAGS.batchnorm}_{FLAGS.nr_epochs}"

    overall_batch = 0
    min_valid_loss = 10000

    # training loop
    for epoch in range(FLAGS.nr_epochs):

        print(f"\nEpoch: {epoch}")

        for batch, (x, y) in enumerate(train_dl):
            nn.train()

            # squeeze the input, and put on device
            x = x.to(device)
            y = y.to(device)

            optimizer.zero_grad()

            # forward pass
            pred = nn(x).to(device)

            # compute loss and backpropagate
            loss = loss_function(pred, y)
            loss.backward()

            # update the weights
            optimizer.step()

            # save training loss
            training_losses.append(loss.item())

            # print(f"batch loss ({batch}): {loss.item()}")

            # get loss on validation set and evaluate
            if overall_batch % FLAGS.eval_freq == 0 and overall_batch != 0:
                valid_loss = eval_on_test(nn, loss_function, valid_dl, device)
                valid_losses.append(valid_loss)
                print(
                    f"Training loss: {loss.item()} / Valid loss: {valid_loss}")
                if valid_loss < min_valid_loss:
                    print(
                        f"Model is saved in epoch {epoch}, overall batch: {overall_batch}"
                    )
                    torch.save(nn.state_dict(),
                               f"Models/Regression_{variables_string}.pt")
                    min_valid_loss = valid_loss
                    optimal_batch = overall_batch

            overall_batch += 1

    # Load the optimal model (with loweest validation loss, and evaluate on test set)
    optimal_nn = Regression(input_size, dnn_hidden_units, dropout_probs, 1,
                            FLAGS.neg_slope, FLAGS.batchnorm).to(device)
    optimal_nn.load_state_dict(
        torch.load(f"Models/Regression_{variables_string}.pt"))

    test_loss, test_pred, test_true = eval_on_test(optimal_nn,
                                                   loss_function,
                                                   test_dl,
                                                   device,
                                                   verbose=FLAGS.verbose,
                                                   return_preds=True)

    # save the test predictions of the regressor
    with open(
            f"Predictions/regression_test_preds{FLAGS.embedder}_{FLAGS.reduced_classes}_{FLAGS.impression}.pt",
            "wb") as f:
        pkl.dump(test_pred, f)

    print(
        f"Loss on test set of optimal model (batch {optimal_batch}): {test_loss}"
    )

    significance_testing(test_pred, test_true, loss_function, FLAGS)

    if FLAGS.plotting:
        plotting(training_losses, valid_losses, test_loss, variables_string,
                 optimal_batch, FLAGS)