def objective(trial): out_channels = int(trial.suggest_discrete_uniform('out_channels', 50, 200, 50)) drop_rate = trial.suggest_discrete_uniform('drop_rate', 0.0, 0.5, 0.1) learning_rate = trial.suggest_loguniform('learning_rate', 5e-4, 5e-2) momentum = trial.suggest_discrete_uniform('momentum', 0.5, 0.9, 0.1) batch_size = int(trial.suggest_discrete_uniform('batch_size', 16, 128, 16)) VOCAB_SIZE = len(set(word_to_id.values())) + 1 PADDING_IDX = len(set(word_to_id.values())) EMB_SIZE = 300 KERNEL_HEIGHTS = 3 STRIDE = 1 PADDING = 1 OUTPUT_SIZE = 4 CONV_PARAMS = [[2, 0], [3, 1], [4, 2]] NUM_EPOCHS = 10 model = CNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, OUT_CHANNELS, KERNEL_HEIGHTS, STRIDE, PADDING, emb_weights=weights) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) device = torch.device('cuda') log = train_model(dataset_train, dataset_dev, batch_size, model, criterion, optimizer, NUM_EPOCHS, collate_fn=Padsequence(PADDING_IDX), device=device) loss_dev, _ = calculate_loss_and_acc(model, dataset_dev, criterion=criterion, device=device)
test_set = CreateDataset(x_test, y_test, test_text2id) VOCAB_SIZE = len(set(tr_word2id.values())) + 1 EMB_SIZE = 300 PADDING_IDX = len(set(tr_word2id.values())) OUTPUT_SIZE = 4 HIDDEN_SIZE = 50 NUM_LAYERS = 2 LEARNING_RATE = 5e-2 BATCH_SIZE = 32 NUM_EPOCHS = 10 model = RNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, bidirectional=True) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE) log = train_model(train_set, valid_set, BATCH_SIZE, model, criterion, optimizer, NUM_EPOCHS, collate_fn=Padsequence(PADDING_IDX))
OUTPUT_SIZE = 4 HIDDEN_SIZE = 50 LEARNING_RATE = 1e-3 BATCH_SIZE = 32 NUM_EPOCHS = 10 rnn = RNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, HIDDEN_SIZE) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(rnn.parameters(), lr=LEARNING_RATE) log = train_model(dataset_train, dataset_dev, BATCH_SIZE, rnn, criterion, optimizer, NUM_EPOCHS, collate_fn=Padsequence(PADDING_IDX), device=device) """ epoch: 1, loss_train: 1.0687, accuracy_train: 0.6129, loss_dev: 1.1163, accuracy_dev: 0.5926 epoch: 2, loss_train: 1.0061, accuracy_train: 0.6321, loss_dev: 1.0401, accuracy_dev: 0.6053 epoch: 3, loss_train: 0.9022, accuracy_train: 0.6654, loss_dev: 0.9739, accuracy_dev: 0.6148 epoch: 4, loss_train: 0.8375, accuracy_train: 0.6977, loss_dev: 0.9593, accuracy_dev: 0.6370 epoch: 5, loss_train: 0.8625, accuracy_train: 0.6774, loss_dev: 0.9510, accuracy_dev: 0.6233 epoch: 6, loss_train: 0.7032, accuracy_train: 0.7437, loss_dev: 0.8379, accuracy_dev: 0.6794 epoch: 7, loss_train: 0.6693, accuracy_train: 0.7564, loss_dev: 0.8294, accuracy_dev: 0.6815 epoch: 8, loss_train: 0.5748, accuracy_train: 0.7857, loss_dev: 0.7458, accuracy_dev: 0.7090 epoch: 9, loss_train: 0.5543, accuracy_train: 0.7920, loss_dev: 0.7401, accuracy_dev: 0.7026 epoch: 10, loss_train: 0.5499, accuracy_train: 0.7921, loss_dev: 0.7409, accuracy_dev: 0.7037