def run(): print('Loading data...') with open('data/data_emb', 'rb') as f: all_sets, embs, word2idx = pickle.load(f) emb_layer = nn.Embedding(embs.shape[0], embs.shape[1]) emb_layer.weight = nn.Parameter(torch.from_numpy(embs)) model = Model(emb_layer).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=hparams['learning_rate']) train_set = Dataset(all_sets[0], shuffle=True, pad_keys=('q1', 'q2')) dev_set = Dataset(all_sets[1], shuffle=False, pad_keys=('q1', 'q2')) test_set = Dataset(all_sets[2], shuffle=False, pad_keys=('q1', 'q2')) step = 0 sum_loss = 0 dev_best = 0 test_score = 0 print("Starting training...") print(hparams) start_time = time.time() for epoch in range(hparams['max_epoch']): batches, batch_lengths = train_set.get_batches(hparams['batch_size'], ('q1', 'q2', 'y')) for b_data, b_lengths in zip(batches, batch_lengths): sum_loss += run_batch(b_data, b_lengths, model, optimizer) step += 1 if step % hparams['display_step'] == 0: avg_loss = sum_loss / hparams['display_step'] sum_loss = 0 dev_score = run_epoch_eval(dev_set, model) out_str = f'Epoc {epoch} iter {step} took {time.time() - start_time:.1f}s\n' \ f'loss:\t{avg_loss:.5f}\tdev score:\t{dev_score:.4f}' if dev_score > dev_best: dev_best = dev_score output_file = f'pred/{get_script_short_name(__file__)}.pred' test_score = run_epoch_eval(test_set, model, output_file) out_str += f'\t*** New best dev ***\ttest score:\t{test_score:.4f}' print(out_str) start_time = time.time() print('Best model on dev: dev:{:.4f}\ttest:{:.4f}'.format( dev_best, test_score))
train_data = Dataset( 'C:\\Users\\sanujb\\PycharmProjects\\CS585_FinalProject\\MaLSTM\\data\\train.csv', model) config = Config(len(train_data.vocab)) rnn = SiameseLSTM(config, train_data.model) loss_function = nn.BCELoss() optimizer = optim.Adadelta(rnn.encoder_params()) num_epochs = 100 train_size = 80000 for epoch in range(num_epochs): start_time = timeit.default_timer() epoch_loss = 0 batch_num = 1 for batch, targets in train_data.get_batches(config.batch_size, train_size): preds = rnn(batch) batch_loss = loss_function(preds, targets) epoch_loss += batch_loss optimizer.zero_grad() # reset the gradients from the last batch batch_loss.backward() # does backprop!!! torch.nn.utils.clip_grad_norm_(rnn.encoder_params(), 0.25) optimizer.step() # updates parameters using gradients if batch_num % 100 == 0: print('Batch number: {}, batch loss: {}, epoch loss {}'.format( batch_num, batch_loss, epoch_loss)) batch_num += 1 print(epoch, epoch_loss)