def test_ranking(model, test_batches): num_batches = len(test_batches) map, ndcg_1, ndcg_3, ndcg_10 = 0, 0, 0, 0 for batch_no in range(1, num_batches + 1): test_sessions, length, test_clicks, click_labels = helper.session_to_tensor(test_batches[batch_no - 1], model.dictionary) if model.config.cuda: test_sessions = test_sessions.cuda() test_clicks = test_clicks.cuda() click_labels = click_labels.cuda() ret_val = compute_ranking_performance(model, test_sessions, test_clicks, click_labels) map += ret_val[0] ndcg_1 += ret_val[1] ndcg_3 += ret_val[2] ndcg_10 += ret_val[3] map = map / num_batches ndcg_1 = ndcg_1 / num_batches ndcg_3 = ndcg_3 / num_batches ndcg_10 = ndcg_10 / num_batches print('MAP - ', map) print('NDCG@1 - ', ndcg_1) print('NDCG@3 - ', ndcg_3) print('NDCG@10 - ', ndcg_10)
def evaluate(model, dictionary, session_queries): sess = Session() sess.queries = session_queries session_tensor, query_lengths = helper.session_to_tensor([sess], dictionary, True) if args.cuda: session_tensor = session_tensor.cuda() query_lengths = query_lengths.cuda() return suggest_next_query(model, dictionary, session_tensor, query_lengths)
def evaluate(model, dictionary, session_queries): session = Session() session.queries = session_queries session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels = helper.session_to_tensor( [session], dictionary, iseval=True) if model.config.cuda: session_queries = session_queries.cuda() session_query_length = session_query_length.cuda() return suggest_next_query(model, session_queries, session_query_length, dictionary)
def evaluate(model, dictionary, session_queries): session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels = helper.session_to_tensor( [session_queries], dictionary, True) if model.config.cuda: session_queries = session_queries.cuda() session_query_length = session_query_length.cuda() output_words = suggest_next_query(model, session_queries, session_query_length) return " ".join(output_words[:-1])
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_loss_total = 0 plot_loss_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sessions, length, train_clicks, click_labels = helper.session_to_tensor( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: # batch_size x session_length x max_query_length train_sessions = train_sessions.cuda() # batch_size x session_length x num_clicks_per_query x max_document_length train_clicks = train_clicks.cuda() # batch_size x session_length length = length.cuda() # batch_size x session_length x num_clicks_per_query click_labels = click_labels.cuda() loss = self.model(train_sessions, length, train_clicks, click_labels) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() self.optimizer.step() print_loss_total += loss.data[0] plot_loss_total += loss.data[0] if batch_no % self.config.print_every == 0: print_loss_avg = print_loss_total / self.config.print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_loss_avg)) if batch_no % self.config.plot_every == 0: plot_loss_avg = plot_loss_total / self.config.plot_every self.train_losses.append(plot_loss_avg) plot_loss_total = 0
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_loss_total = 0 plot_loss_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() session_queries, session_query_length = helper.session_to_tensor( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: # batch_size x session_length x max_query_length session_queries = session_queries.cuda() # batch_size x session_length session_query_length = session_query_length.cuda() loss = self.model(session_queries, session_query_length) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) self.optimizer.step() print_loss_total += loss.data[0] plot_loss_total += loss.data[0] if batch_no % self.config.print_every == 0: print_loss_avg = print_loss_total / self.config.print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_loss_avg)) if batch_no % self.config.plot_every == 0: plot_loss_avg = plot_loss_total / self.config.plot_every self.train_losses.append(plot_loss_avg) plot_loss_total = 0
def test_loss(model, test_batches): num_batches = len(test_batches) test_loss = 0 for batch_no in range(1, num_batches + 1): test_sessions, length, test_clicks, click_labels = helper.session_to_tensor(test_batches[batch_no - 1], model.dictionary) if model.config.cuda: test_sessions = test_sessions.cuda() test_clicks = test_clicks.cuda() length = length.cuda() click_labels = click_labels.cuda() loss = model(test_sessions, length, test_clicks, click_labels) if loss.size(0) > 1: loss = torch.mean(loss) test_loss += loss.data[0] print('test loss - ', (test_loss / num_batches))
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): session_queries, session_query_length = helper.session_to_tensor( dev_batches[batch_no - 1], self.dictionary, True) if self.config.cuda: session_queries = session_queries.cuda() session_query_length = session_query_length.cuda() loss = self.model(session_queries, session_query_length) dev_loss += loss.data[0] return dev_loss / num_batches
def validate(self, dev_batches): # Turn on evaluation mode which disables dropout. self.model.eval() dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): dev_sessions, length = helper.session_to_tensor( dev_batches[batch_no - 1], self.dictionary) if self.config.cuda: dev_sessions = dev_sessions.cuda() length = length.cuda() loss = self.model(dev_sessions, length) if loss.size(0) > 1: loss = torch.mean(loss) dev_loss += loss.data[0] # Turn on training mode at the end of validation. self.model.train() return dev_loss / num_batches
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): dev_sessions, length, dev_clicks, click_labels = helper.session_to_tensor( dev_batches[batch_no - 1], self.dictionary) if self.config.cuda: dev_sessions = dev_sessions.cuda() dev_clicks = dev_clicks.cuda() length = length.cuda() click_labels = click_labels.cuda() loss = self.model(dev_sessions, length, dev_clicks, click_labels) if loss.size(0) > 1: loss = loss.mean() dev_loss += loss.data[0] return dev_loss / num_batches
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() plot_loss_total = 0 print_click_loss, print_decoding_loss = 0, 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels = helper.session_to_tensor( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: # batch_size x session_length x max_query_length session_queries = session_queries.cuda() # batch_size x session_length session_query_length = session_query_length.cuda() # batch_size x session_length x num_rel_docs_per_query x max_doc_length rel_docs = rel_docs.cuda() # batch_size x session_length x num_rel_docs_per_query rel_docs_length = rel_docs_length.cuda() # batch_size x session_length x num_rel_docs_per_query doc_labels = doc_labels.cuda() click_loss, decoding_loss = self.model(session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels) loss = click_loss + decoding_loss if click_loss.size(0) > 1: click_loss = click_loss.mean() if decoding_loss.size(0) > 1: decoding_loss = decoding_loss.mean() print_click_loss += click_loss.data[0] print_decoding_loss += decoding_loss.data[0] # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) self.optimizer.step() plot_loss_total += loss.data[0] if batch_no % self.config.print_every == 0: click_loss_avg = print_click_loss / self.config.print_every decoding_loss_avg = print_decoding_loss / self.config.print_every print_click_loss, print_decoding_loss = 0, 0 print('%s (%d %d%%) %.4f %.4f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, click_loss_avg, decoding_loss_avg)) if batch_no % self.config.plot_every == 0: plot_loss_avg = plot_loss_total / self.config.plot_every self.train_losses.append(plot_loss_avg) plot_loss_total = 0
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss, dev_click_loss, dev_decoding_loss = 0, 0, 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels = helper.session_to_tensor( dev_batches[batch_no - 1], self.dictionary, True) if self.config.cuda: session_queries = session_queries.cuda() session_query_length = session_query_length.cuda() rel_docs = rel_docs.cuda() rel_docs_length = rel_docs_length.cuda() doc_labels = doc_labels.cuda() click_loss, decoding_loss = self.model(session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels) if click_loss.size(0) > 1: click_loss = click_loss.mean() if decoding_loss.size(0) > 1: decoding_loss = decoding_loss.mean() dev_click_loss += click_loss.data[0] dev_decoding_loss += decoding_loss.data[0] dev_loss += click_loss.data[0] + decoding_loss.data[0] print('validation loss = %.4f %.4f' % ((dev_click_loss / num_batches), (dev_decoding_loss / num_batches))) return dev_loss / num_batches
def test_ranking(model, test_batches, dictionary): num_batches = len(test_batches) map, mrr, ndcg_1, ndcg_3, ndcg_5, ndcg_10 = 0, 0, 0, 0, 0, 0 for batch_no in range(1, num_batches + 1): session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels = helper.session_to_tensor( test_batches[batch_no - 1], dictionary, True) if model.config.cuda: session_queries = session_queries.cuda() session_query_length = session_query_length.cuda() rel_docs = rel_docs.cuda() rel_docs_length = rel_docs_length.cuda() doc_labels = doc_labels.cuda() ret_val = compute_ranking_performance(model, session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels) map += ret_val[0] mrr += ret_val[1] ndcg_1 += ret_val[2] ndcg_3 += ret_val[3] ndcg_5 += ret_val[4] ndcg_10 += ret_val[5] _map = map / num_batches mrr = mrr / num_batches ndcg_1 = ndcg_1 / num_batches ndcg_3 = ndcg_3 / num_batches ndcg_5 = ndcg_5 / num_batches ndcg_10 = ndcg_10 / num_batches print('MAP - ', _map) print('MRR - ', mrr) print('NDCG@1 - ', ndcg_1) print('NDCG@3 - ', ndcg_3) print('NDCG@5 - ', ndcg_5) print('NDCG@10 - ', ndcg_10)
for sess_len, sessions in test_corpus.data.items(): for session in sessions: anchor_query_text = ' '.join(session.queries[sess_len - 2].query_terms[1:-1]) if anchor_query_text not in candidate_map: continue cands = [] for query_text in candidate_map[anchor_query_text]: query = data.Query() query.add_text(query_text, args.tokenize, args.max_query_length) cands.append(query) cands.append(session.queries[sess_len - 1]) scores = [] session_queries, session_query_length = helper.session_to_tensor( [session], dictionary) for cand in cands: next_query_tensor = helper.sequence_to_tensors( cand.query_terms, len(cand.query_terms), dictionary) next_query_tensor = Variable(next_query_tensor).unsqueeze(0) if model.config.cuda: session_queries = session_queries.cuda( ) # 1 x session_length x max_query_length session_query_length = session_query_length.cuda( ) # 1 x session_length next_query_tensor = next_query_tensor.cuda( ) # 1 x max_query_length score = suggest_next_query(model, session_queries, session_query_length, next_query_tensor)
def train(self, train_batches, dev_batches, epoch_no): # Turn on training mode which enables dropout. self.model.train() start = time.time() print_loss_total = 0 plot_loss_total = 0 num_batches = len(train_batches) print('epoch %d started' % epoch_no) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sessions, length = helper.session_to_tensor( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sessions = train_sessions.cuda() length = length.cuda() loss = self.model(train_sessions, length) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = torch.mean(loss) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm(self.model.parameters(), self.config.clip) self.optimizer.step() print_loss_total += loss.data[0] plot_loss_total += loss.data[0] if batch_no % self.config.print_every == 0: print_loss_avg = print_loss_total / self.config.print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_loss_avg)) if batch_no % self.config.plot_every == 0: plot_loss_avg = plot_loss_total / self.config.plot_every self.train_losses.append(plot_loss_avg) plot_loss_total = 0 if batch_no % self.config.dev_every == 0: dev_loss = self.validate(dev_batches) self.dev_losses.append(dev_loss) print('validation loss = %.4f' % dev_loss) if self.best_dev_loss == -1 or self.best_dev_loss > dev_loss: self.best_dev_loss = dev_loss helper.save_checkpoint( { 'epoch': epoch_no, 'state_dict': self.model.state_dict(), 'best_loss': self.best_dev_loss, 'optimizer': self.optimizer.state_dict(), }, self.config.save_path + 'model_best.pth.tar') else: self.times_no_improvement += 1 # no improvement in validation loss for last n times, so stop training if self.times_no_improvement == 20: self.stop = True break