Beispiel #1
0
def run():
    print('Loading data...')
    with open('data/data_emb', 'rb') as f:
        all_sets, embs, word2idx = pickle.load(f)
    emb_layer = nn.Embedding(embs.shape[0], embs.shape[1])
    emb_layer.weight = nn.Parameter(torch.from_numpy(embs))
    model = Model(emb_layer).cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=hparams['learning_rate'])
    train_set = Dataset(all_sets[0], shuffle=True, pad_keys=('q1', 'q2'))
    dev_set = Dataset(all_sets[1], shuffle=False, pad_keys=('q1', 'q2'))
    test_set = Dataset(all_sets[2], shuffle=False, pad_keys=('q1', 'q2'))
    step = 0
    sum_loss = 0
    dev_best = 0
    test_score = 0
    print("Starting training...")
    print(hparams)
    start_time = time.time()
    for epoch in range(hparams['max_epoch']):
        batches, batch_lengths = train_set.get_batches(hparams['batch_size'],
                                                       ('q1', 'q2', 'y'))
        for b_data, b_lengths in zip(batches, batch_lengths):
            sum_loss += run_batch(b_data, b_lengths, model, optimizer)
            step += 1
            if step % hparams['display_step'] == 0:
                avg_loss = sum_loss / hparams['display_step']
                sum_loss = 0
                dev_score = run_epoch_eval(dev_set, model)
                out_str = f'Epoc {epoch} iter {step} took {time.time() - start_time:.1f}s\n' \
                          f'loss:\t{avg_loss:.5f}\tdev score:\t{dev_score:.4f}'
                if dev_score > dev_best:
                    dev_best = dev_score
                    output_file = f'pred/{get_script_short_name(__file__)}.pred'
                    test_score = run_epoch_eval(test_set, model, output_file)
                    out_str += f'\t*** New best dev ***\ttest score:\t{test_score:.4f}'
                print(out_str)
                start_time = time.time()
    print('Best model on dev: dev:{:.4f}\ttest:{:.4f}'.format(
        dev_best, test_score))
train_data = Dataset(
    'C:\\Users\\sanujb\\PycharmProjects\\CS585_FinalProject\\MaLSTM\\data\\train.csv',
    model)
config = Config(len(train_data.vocab))
rnn = SiameseLSTM(config, train_data.model)

loss_function = nn.BCELoss()
optimizer = optim.Adadelta(rnn.encoder_params())
num_epochs = 100
train_size = 80000

for epoch in range(num_epochs):
    start_time = timeit.default_timer()
    epoch_loss = 0
    batch_num = 1
    for batch, targets in train_data.get_batches(config.batch_size,
                                                 train_size):
        preds = rnn(batch)
        batch_loss = loss_function(preds, targets)
        epoch_loss += batch_loss

        optimizer.zero_grad()  # reset the gradients from the last batch
        batch_loss.backward()  # does backprop!!!
        torch.nn.utils.clip_grad_norm_(rnn.encoder_params(), 0.25)
        optimizer.step()  # updates parameters using gradients

        if batch_num % 100 == 0:
            print('Batch number: {}, batch loss: {}, epoch loss {}'.format(
                batch_num, batch_loss, epoch_loss))
        batch_num += 1

    print(epoch, epoch_loss)