Esempio n. 1
0
def objective(trial):
    nl = trial.suggest_int("num_layers", 1, 2)
    bi = trial.suggest_categorical("bidirectional", [False, True])
    model = RNN(d_w, d_h, L, emb, num_layers=nl, bidirectional=bi)

    lr = trial.suggest_loguniform("learning_rate", 1e-5, 1e-1)
    bs = trial.suggest_categorical("batch_size", [32, -1])
    model = run_train(train,
                      valid,
                      model,
                      epochs=11,
                      lr=lr,
                      batch_size=bs,
                      device=device)

    loss_eval, acc_eval = run_eval(model, valid, device=device)
    return acc_eval
Esempio n. 2
0
if __name__ == "__main__":
    x_train = get_feature(open('train2.feature.txt'))
    y_train = joblib.load('train_label.pkl').tolist()
    x_valid = get_feature(open('valid2.feature.txt'))
    y_valid = joblib.load('valid_label.pkl').tolist()
    x_test = get_feature(open('test2.feature.txt'))
    y_test = joblib.load('test_label.pkl').tolist()

    tr_text2id, tr_word2id = word2ids(x_train)
    train_set = CreateDataset(x_train, y_train, tr_text2id)
    val_text2id, val_word2id = word2ids(x_valid)
    valid_set = CreateDataset(x_valid, y_valid, val_text2id)
    test_text2id, test_word2id = word2ids(x_test)
    test_set = CreateDataset(x_test, y_test, test_text2id)

    VOCAB_SIZE = len(set(tr_word2id.values())) + 1 
    EMB_SIZE = 300
    PADDING_IDX = len(set(tr_word2id.values())) 
    OUTPUT_SIZE = 4
    HIDDEN_SIZE = 50
    LEARNING_RATE = 1e-3
    BATCH_SIZE = 1
    NUM_EPOCHS = 10

    model = RNN(VOCAB_SIZE, EMB_SIZE, PADDING_IDX, OUTPUT_SIZE, HIDDEN_SIZE)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

    log = train_model(train_set, valid_set, BATCH_SIZE, model, criterion, optimizer, NUM_EPOCHS)
Esempio n. 3
0
from knock82 import run_eval, run_train

sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from kiyuna.utils.pickle import load  # noqa: E402 isort:skip

logging.basicConfig(level=logging.DEBUG)


d_w = 300
d_h = 50
V = get_V()
L = 4


if __name__ == "__main__":
    train = torch.load("./data/train.pt")
    valid = torch.load("./data/valid.pt")
    test = torch.load("./data/test.pt")
    device = torch.device("cuda:6")
    emb = torch.Tensor(d_w, V).normal_()
    wv = load("chap07-embeddings")
    for i, word in enumerate(_list_valid_words()):
        if word in wv:
            wv_word = wv[word]
            wv_word.flags["WRITEABLE"] = True
            emb[:, i] = torch.from_numpy(wv_word)
    rnn = RNN(d_w, d_h, L, emb, num_layers=2, bidirectional=True)
    rnn = run_train(train, valid, rnn, epochs=11, lr=1e-1, batch_size=32, device=device)
    loss, acc = run_eval(rnn, test, device=device)
    print(f"Accuracy (test): {acc:f}, Loss (test): {loss:f}")
Esempio n. 4
0
logging.basicConfig(level=logging.DEBUG)

d_w = 300
d_h = 50
V = get_V()
L = 4

if __name__ == "__main__":
    train = torch.load("./data/train.pt")
    valid = torch.load("./data/valid.pt")
    test = torch.load("./data/test.pt")
    device = torch.device("cuda:7")
    emb = torch.Tensor(d_w, V).normal_()
    wv = load("chap07-embeddings")
    for i, word in enumerate(_list_valid_words()):
        if word in wv:
            wv_word = wv[word]
            wv_word.flags["WRITEABLE"] = True
            emb[:, i] = torch.from_numpy(wv_word)
    rnn = RNN(d_w, d_h, L, emb)
    rnn = run_train(train,
                    valid,
                    rnn,
                    epochs=11,
                    lr=1e-1,
                    batch_size=32,
                    device=device)
    loss, acc = run_eval(rnn, test, device=device)
    print(f"Accuracy (test): {acc:f}, Loss (test): {loss:f}")