def train_sentiment_rnn(train_data: List[SentimentExample], dev_data: List[SentimentExample], word_embed: WordEmbedding): model = RNN(conf=sentiment_conf, word_embed=word_embed) acc = 0.0 lr = sentiment_conf.initial_lr epochs = sentiment_conf.epochs batch_size = sentiment_conf.batch_size optimizer = optim.Adam(model.parameters(), lr=lr) loss_function = nn.BCELoss() x_padded, y_padded = get_xy_padded(data=train_data, word_embed=word_embed) # x, y = get_xy(data=train_data) for epoch in range(0, epochs): # shuffle(train_data) # shuffle(x_padded, y_padded) total_loss = 0.0 no_of_batch = 0 for start_ix in range(0, len(train_data), batch_size): x_batch = get_batch(data=x_padded, start_ix=start_ix, batch_size=batch_size) y_batch = get_batch(data=y_padded, start_ix=start_ix, batch_size=batch_size) model.zero_grad() probs = model(x_batch) loss = loss_function(probs, y_batch) total_loss += loss / batch_size no_of_batch += 1 loss.backward() optimizer.step() # print('Current Batch Loss is: {}'.format(loss/batch_size)) print("Loss on epoch %i: %f" % (epoch, total_loss / no_of_batch)) _, metrics = evaluate_sentiment(model=model, data=dev_data, word_embedding=word_embed, model_type='RNN') print(" ======== Performance after epoch {} is ====== ".format(epoch)) print("New Accuracy = ", metrics.accuracy) if metrics.accuracy > acc: acc = metrics.accuracy print("==== saving model ====") torch.save(model.state_dict(), sentiment_conf.model_path)
def train_sentiment_ffnn(train_data: List[SentimentExample], dev_data: List[SentimentExample], word_embed: WordEmbedding): model = FFNN(sentiment_conf) acc = 0.0 lr = sentiment_conf.initial_lr optimizer = optim.Adam(model.parameters(), lr=lr) epochs = sentiment_conf.epochs batch_size = sentiment_conf.batch_size loss_function = nn.BCELoss() for epoch in range(0, epochs): shuffle(train_data) total_loss = 0.0 for start_ix in range(0, len(train_data), batch_size): train_batch = get_batch(data=train_data, start_ix=start_ix, batch_size=batch_size) x_batch, y_batch = get_xy_embedded(data=train_batch, word_embed=word_embed) model.zero_grad() probs = model(x_batch) loss = loss_function(probs, y_batch) total_loss += loss / batch_size loss.backward() optimizer.step() print("Loss on epoch %i: %f" % (epoch, total_loss)) _, metrics = evaluate_sentiment(model=model, data=dev_data, word_embedding=word_embed, model_type='FFNN') print(" ======== Performance after epoch {} is ====== ".format(epoch)) print("New Accuracy = ", metrics.accuracy) if metrics.accuracy > acc: acc = metrics.accuracy print("==== saving model ====") torch.save(model.state_dict(), sentiment_conf.model_path)
def train_sentiment_rnn(train_data: List[SentimentExample], dev_data: List[SentimentExample], test_data: List[SentimentExample], word_embed: WordEmbedding): model = RNN(conf=sentiment_conf, word_embed=word_embed) acc = 0.0 last_epoch_acc = 0.0 lr = sentiment_conf.initial_lr lr_decay = sentiment_conf.lr_decay weight_decay = sentiment_conf.weight_decay epochs = sentiment_conf.epochs batch_size = sentiment_conf.batch_size # optimizer = optim.Adam(model.parameters(), lr=lr) # optimizer = torch.optim.Adagrad(model.parameters(), lr=lr, lr_decay=lr_decay, weight_decay=weight_decay) loss_function = nn.BCELoss() x_padded, y_padded = get_xy_padded(data=train_data, word_embed=word_embed) # x, y = get_xy(data=train_data) for epoch in range(0, epochs): # if (epoch + 1) % 5 == 0: # lr = sentiment_conf.initial_lr print('Learning Rate is: {}'.format(lr)) optimizer = optim.Adam(model.parameters(), lr=lr) # shuffle(train_data) # shuffle(x_padded, y_padded) total_loss = 0.0 no_of_batch = 0 for start_ix in range(0, len(train_data), batch_size): x_batch = get_batch(data=x_padded, start_ix=start_ix, batch_size=batch_size) y_batch = get_batch(data=y_padded, start_ix=start_ix, batch_size=batch_size) probs = model(x_batch) loss = loss_function(probs, y_batch) total_loss += loss / batch_size no_of_batch += 1 model.zero_grad() loss.backward() optimizer.step() # print('Current Batch Loss is: {}'.format(loss/batch_size)) print("Loss on epoch %i: %f" % (epoch, total_loss / no_of_batch)) # _, metrics = evaluate_sentiment(model=model, data=dev_data, # word_embedding=word_embed, model_type='RNN') _, accuracy = evaluate_sentiment_simple(model=model, data=dev_data, word_embedding=word_embed, model_type='RNN') print(" ======== Performance after epoch {} is ====== ".format(epoch)) print("New Accuracy = ", accuracy) if accuracy > acc: acc = accuracy print("==== saving model ====") torch.save(model.state_dict(), sentiment_conf.model_path) y_pred, _ = evaluate_sentiment_simple(model=model, model_type='RNN', word_embedding=word_embed, data=test_data) test_predicted = [] for pred, data_point in zip(y_pred, test_data): test_predicted.append( SentimentExample(label=int(pred), indexed_words=data_point.indexed_words)) # Write the test set output print('writing Test Output') write_sentiment_examples(test_predicted, sentiment_conf.output_path, word_embed.word_ix) print('Done Writing Test Output') lr = lr / 2 elif (accuracy - last_epoch_acc) < 0: lr = sentiment_conf.initial_lr / 10 elif lr > sentiment_conf.initial_lr / 100: lr = lr / 2 last_epoch_acc = accuracy
def train(self): acc = 0.0 last_epoch_acc = 0.0 lr = parser_conf.initial_lr lr_decay = parser_conf.lr_decay weight_decay = parser_conf.weight_decay epochs = parser_conf.epochs batch_size = parser_conf.batch_size encoder = RNNEncoder(conf=parser_conf, word_embed=self.encoder_embed) decoder = RNNDecoder(conf=parser_conf, word_embed=self.decoder_embed) model = RNNSeq2Seq(encoder=encoder, decoder=decoder) print(model) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) loss_function = nn.CrossEntropyLoss( ignore_index=self.encoder_embed.word_ix.add_and_get_index( common_conf.PAD_TOKEN)) for epoch in range(0, epochs): random.shuffle(self.train_data) x_padded, y_padded = get_xy( data=self.train_data, enc_pad_ix=self.input_ix.objs_to_ints[common_conf.PAD_TOKEN], dec_pad_ix=self.output_ix.objs_to_ints[common_conf.PAD_TOKEN]) epoch_loss = 0 for start_ix in range(0, len(self.train_data), batch_size): # for data_point in self.train_data: # x, y = get_xy([data_point]) x = get_batch(data=x_padded, start_ix=start_ix, batch_size=batch_size) y = get_batch(data=y_padded, start_ix=start_ix, batch_size=batch_size) optimizer.zero_grad() y_pred = model(x=x, y=y, teacher_forcing=parser_conf.teacher_force_train) y_pred = y_pred.view(-1, y_pred.shape[-1]) y_true = y.view(-1) loss = loss_function(y_pred, y_true) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() epoch_loss += loss.item() print(epoch_loss / len(self.train_data)) print('current dev acc') pred_deriv = self.decode(test_data=self.dev_data, model=model) evaluate(dev_data=self.dev_data, pred_derivations=pred_deriv) if (epoch + 1) % 5 == 0: lr = parser_conf.initial_lr else: lr = lr / 2 return model