def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): queries, docs, click_labels = helper.batch_to_tensor(dev_batches[batch_no - 1], self.dictionary, self.config) if self.config.cuda: # batch_size x max_query_length queries = queries.cuda() # batch_size x num_clicks_per_query x max_document_length docs = docs.cuda() # batch_size x num_clicks_per_query click_labels = click_labels.cuda() score = self.model(queries, docs) loss = self.compute_loss(score, click_labels) dev_loss += loss.data[0] return dev_loss / num_batches
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): videos, video_len, descriptions, des_len = helper.videos_to_tensor( dev_batches[batch_no - 1], self.dictionary) if self.config.cuda: videos = videos.cuda( ) # batch_size x max_images_per_video x num_image_features descriptions = descriptions.cuda( ) # batch_size x max_description_length des_len = des_len.cuda() # batch_size loss = self.model(videos, video_len, descriptions, des_len) if loss.size(0) > 1: loss = loss.mean() dev_loss += loss.data[0] return dev_loss / num_batches
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) num_batches = len(dev_batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels, pos_sentences1, pos_sentences2 = helper.batch_to_tensors( dev_batches[batch_no - 1], self.dictionary, iseval=True, pos=self.config.pos) if self.config.cuda and torch.cuda.is_available(): dev_sentences1 = dev_sentences1.cuda() dev_sentences2 = dev_sentences2.cuda() pos_sentences1 = pos_sentences1.cuda() pos_sentences2 = pos_sentences2.cuda() dev_labels = dev_labels.cuda() assert dev_sentences1.size(0) == dev_sentences2.size(0) if self.config.pos: score = self.model(dev_sentences1, sent_len1, dev_sentences2, sent_len2, pos_sentences1, pos_sentences2) else: score = self.model(dev_sentences1, sent_len1, dev_sentences2, sent_len2) n_correct += (torch.max(score, 1)[1].view( dev_labels.size()).data == dev_labels.data).sum() n_total += len(dev_batches[batch_no - 1]) return 100. * n_correct / n_total
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( train_batches[batch_no - 1], self.dictionary) if self.config.cuda and torch.cuda.is_available(): train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2) n_correct = (torch.max(score, 1)[1].view( train_labels.size()).data == train_labels.data).sum() loss = self.criterion(score, train_labels) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) self.optimizer.step() print_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) if batch_no % self.config.print_every == 0: print_acc_avg = print_acc_total / self.config.print_every print_acc_total = 0 print('%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_avg)) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0
def train(self): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches batches, batch_labels = [], [] for task_name, task in self.train_corpus.items(): train_batches = helper.batchify(task.data, self.config.batch_size) batches.extend(train_batches) batch_labels.extend([task_name] * len(train_batches)) combined = list(zip(batches, batch_labels)) numpy.random.shuffle(combined) batches[:], batch_labels[:] = zip(*combined) print('number of train batches = ', len(batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_back = 0 num_batches = len(batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() if self.config.use_elmo: train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_elmo_tensors( batches[batch_no - 1], self.dictionary) else: train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2, batch_labels[batch_no - 1]) n_correct = (torch.max(score, 1)[1].view(train_labels.size()).data == train_labels.data).sum() loss = self.criterion(score, train_labels) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm(filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) self.optimizer.step() print_acc_total += 100. * n_correct / len(batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len(batches[batch_no - 1]) if batch_no % self.config.print_every == 0: sys.stdout.write("\b" * num_back) sys.stdout.write(" " * num_back) sys.stdout.write("\b" * num_back) log_info = '%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_total / batch_no) sys.stdout.write(log_info) sys.stdout.flush() num_back = len(log_info)
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_loss_total = 0 plot_loss_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels = helper.session_to_tensor( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: # batch_size x session_length x max_query_length session_queries = session_queries.cuda() # batch_size x session_length session_query_length = session_query_length.cuda() # batch_size x session_length x num_rel_docs_per_query x max_doc_length rel_docs = rel_docs.cuda() # batch_size x session_length x num_rel_docs_per_query rel_docs_length = rel_docs_length.cuda() # batch_size x session_length x num_rel_docs_per_query doc_labels = doc_labels.cuda() loss = self.model(session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) self.optimizer.step() print_loss_total += loss.data[0] plot_loss_total += loss.data[0] if batch_no % self.config.print_every == 0: print_loss_avg = print_loss_total / self.config.print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_loss_avg)) if batch_no % self.config.plot_every == 0: plot_loss_avg = plot_loss_total / self.config.plot_every self.train_losses.append(plot_loss_avg) plot_loss_total = 0
def getScoreSet1Toward2 (set1,set2,model,dictionary,goAnnotation): def1,def2 = getDefinitions(set1,set2,goAnnotation,isJoin=1) ''' reverse the sentence ordering ''' arrString = prepareBatchReverse(def1,def2) # ''' reverse the sentence ordering ''' test_corpus = data.Corpus2(dictionary) test_corpus.parse(arrString, args.tokenize) test_batches = helper.batchify(test_corpus.data, 2) # args.batch_size=1 # print (test_batches) # print (test_batches[0]) score = evaluate(model, test_batches, dictionary) # @score is distance of set 1 toward set 2. distance is not symmetric return score
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_loss_total = 0 plot_loss_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() videos, video_len, descriptions, des_len = helper.videos_to_tensor( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: videos = videos.cuda( ) # batch_size x max_images_per_video x num_image_features descriptions = descriptions.cuda( ) # batch_size x max_description_length des_len = des_len.cuda() # batch_size loss = self.model(videos, video_len, descriptions, des_len) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) self.optimizer.step() print_loss_total += loss.data[0] plot_loss_total += loss.data[0] if batch_no % self.config.print_every == 0: print_loss_avg = print_loss_total / self.config.print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_loss_avg)) if batch_no % self.config.plot_every == 0: plot_loss_avg = plot_loss_total / self.config.plot_every self.train_losses.append(plot_loss_avg) plot_loss_total = 0
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_loss_total = 0 plot_loss_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_queries, query_len, train_clicks, doc_len, click_labels = helper.batch_to_tensor( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: # batch_size x max_query_length train_queries = train_queries.cuda() # batch_size x num_clicks_per_query x max_document_length train_clicks = train_clicks.cuda() # batch_size x num_clicks_per_query click_labels = click_labels.cuda() score = self.model(train_queries, query_len, train_clicks, doc_len) # loss = self.compute_loss(score, click_labels) loss = f.binary_cross_entropy_with_logits(score, click_labels) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm(filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) self.optimizer.step() print_loss_total += loss.data[0] plot_loss_total += loss.data[0] if batch_no % self.config.print_every == 0: print_loss_avg = print_loss_total / self.config.print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % ( helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_loss_avg)) if batch_no % self.config.plot_every == 0: plot_loss_avg = plot_loss_total / self.config.plot_every self.train_losses.append(plot_loss_avg) plot_loss_total = 0
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_loss_total = 0 plot_loss_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sessions, length, train_clicks, click_labels = helper.session_to_tensor( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: # batch_size x session_length x max_query_length train_sessions = train_sessions.cuda() # batch_size x session_length x num_clicks_per_query x max_document_length train_clicks = train_clicks.cuda() # batch_size x session_length length = length.cuda() # batch_size x session_length x num_clicks_per_query click_labels = click_labels.cuda() loss = self.model(train_sessions, length, train_clicks, click_labels) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() self.optimizer.step() print_loss_total += loss.data[0] plot_loss_total += loss.data[0] if batch_no % self.config.print_every == 0: print_loss_avg = print_loss_total / self.config.print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_loss_avg)) if batch_no % self.config.plot_every == 0: plot_loss_avg = plot_loss_total / self.config.plot_every self.train_losses.append(plot_loss_avg) plot_loss_total = 0
def validate(self): # Turn on evaluation mode which disables dropout. self.generator.eval() # Splitting the data in batches batches, batch_labels = [], [] for task_name, task in self.dev_corpus.items(): dev_batches = helper.batchify(task.data, self.config.batch_size) batches.extend(dev_batches) batch_labels.extend([task_name] * len(dev_batches)) combined = list(zip(batches, batch_labels)) numpy.random.shuffle(combined) batches[:], batch_labels[:] = zip(*combined) print('number of dev batches = ', len(batches)) num_batches = len(batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): if self.config.use_elmo: dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_elmo_input( batches[batch_no - 1], self.dictionary, iseval=True) else: dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors( batches[batch_no - 1], self.dictionary, iseval=True) if self.config.cuda: dev_sentences1 = dev_sentences1.cuda() dev_sentences2 = dev_sentences2.cuda() dev_labels = dev_labels.cuda() assert dev_sentences1.size(0) == dev_sentences2.size(0) if self.config.adversarial: scores, adv_loss, diff_loss = self.generator( dev_sentences1, sent_len1, dev_sentences2, sent_len2, batch_labels[batch_no - 1]) else: scores = self.generator(dev_sentences1, sent_len1, dev_sentences2, sent_len2, batch_labels[batch_no - 1]) n_correct += (torch.max(scores, 1)[1].view( dev_labels.size()).data == dev_labels.data).sum() n_total += len(batches[batch_no - 1]) return 100. * n_correct / n_total
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_loss_total = 0 plot_loss_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_queries, train_docs, click_labels = helper.batch_to_tensor( train_batches[batch_no - 1], self.dictionary, self.config.max_query_length, self.config.max_doc_length) if self.config.cuda: # batch_size x max_query_length x vocab_size train_queries = train_queries.cuda() # batch_size x x num_rel_docs_per_query x max_doc_length x vocab_size train_docs = train_docs.cuda() # batch_size x num_rel_docs_per_query click_labels = click_labels.cuda() softmax_prob = self.model(train_queries, train_docs) loss = self.compute_loss(softmax_prob, click_labels) loss.backward() self.optimizer.step() print_loss_total += loss.data[0] plot_loss_total += loss.data[0] if batch_no % self.config.print_every == 0: print_loss_avg = print_loss_total / self.config.print_every print_loss_total = 0 print('%s (%d %d%%) %.4f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_loss_avg)) if batch_no % self.config.plot_every == 0: plot_loss_avg = plot_loss_total / self.config.plot_every self.train_losses.append(plot_loss_avg) plot_loss_total = 0
def get_trained_model2(c, corpus, dictionary, non_zero_indices): model = LogisticRegression(penalty='l1', tol=0.0001, C=c, fit_intercept=True, \ intercept_scaling=1, solver='liblinear', max_iter=args.epochs, multi_class='ovr', verbose=0) train_batches = helper.batchify(corpus.data, args.batch_size) print('number of train batches = ', len(train_batches)) num_batches = len(train_batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): if batch_no % 500 == 0: print(' training batch: ', batch_no, ' of ', num_batches, ' percentage: ', batch_no / num_batches) train_sentences1, train_labels = helper.batch_to_one_hot_encoded( train_batches[batch_no - 1], dictionary, non_zero_indices=non_zero_indices) model.fit(train_sentences1, train_labels) return model
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): session_queries, session_query_length = helper.session_to_tensor( dev_batches[batch_no - 1], self.dictionary, True) if self.config.cuda: session_queries = session_queries.cuda() session_query_length = session_query_length.cuda() loss = self.model(session_queries, session_query_length) dev_loss += loss.data[0] return dev_loss / num_batches
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): dev_queries, dev_clicks, click_labels = helper.batch_to_tensor(dev_batches[batch_no - 1], self.dictionary) if self.config.cuda: dev_queries = dev_queries.cuda() dev_clicks = dev_clicks.cuda() click_labels = click_labels.cuda() score = self.model(dev_queries, dev_clicks) loss = self.compute_loss(score, click_labels) dev_loss += loss.data[0] return dev_loss / num_batches
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() print_every = self.config.print_every start = time.time() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) num_batches = len(dev_batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors( dev_batches[batch_no - 1], self.dictionary, True) if self.config.cuda: dev_sentences1 = dev_sentences1.cuda() dev_sentences2 = dev_sentences2.cuda() dev_labels = dev_labels.cuda() assert dev_sentences1.size(0) == dev_sentences2.size(0) score = self.model(dev_sentences1, sent_len1, dev_sentences2, sent_len2) n_correct += (torch.max(score, 1)[1].view( dev_labels.size()).data == dev_labels.data).sum() n_total += len(dev_batches[batch_no - 1]) print_acc = 100. * n_correct / n_total if batch_no % print_every == 0 or self.config.debug: p = 100.0 print('%s (%d %d%%) (%.2f) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, p, print_acc)) return 100. * n_correct / n_total
def eval_routine(corpus, dictionary, model, non_zero_indices=None): nexamples = len(corpus.data) dev_batches = helper.batchify(corpus.data, args.batch_size) print('number of train batches = ', len(dev_batches)) total_acc = 0.0 correct = 0.0 num_batches = len(dev_batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): if batch_no % 500 == 0: print(' validation batch: ', batch_no, ' of ', num_batches, ' percentage: ', batch_no / num_batches) eval_sentences, eval_labels = helper.batch_to_one_hot_encoded( dev_batches[batch_no - 1], dictionary, non_zero_indices=non_zero_indices) acc = model.score(eval_sentences, eval_labels) correct += acc * len(eval_labels) total_acc += acc # if batch_no%500 == 0 :print(' for this minibatch score: ', acc, ' correct: ', acc*len(eval_labels), ' of ', len(eval_labels), 'total accc: ', total_acc, ' total correct: ', correct) print(' Correct: ', correct, ' acc: ', correct / nexamples, ' sanity check: ', total_acc / num_batches) return correct / nexamples
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): dev_queries, q_len, dev_clicks, d_len, click_labels = helper.batch_to_tensor(dev_batches[batch_no - 1], self.dictionary, True) if self.config.cuda: dev_queries = dev_queries.cuda() dev_clicks = dev_clicks.cuda() click_labels = click_labels.cuda() score = self.model(dev_queries, q_len, dev_clicks, d_len) loss = f.binary_cross_entropy_with_logits(score, click_labels) if loss.size(0) > 1: loss = loss.mean() dev_loss += loss.data[0] return dev_loss / num_batches
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): dev_sessions, length, dev_clicks, click_labels = helper.session_to_tensor( dev_batches[batch_no - 1], self.dictionary) if self.config.cuda: dev_sessions = dev_sessions.cuda() dev_clicks = dev_clicks.cuda() length = length.cuda() click_labels = click_labels.cuda() loss = self.model(dev_sessions, length, dev_clicks, click_labels) if loss.size(0) > 1: loss = loss.mean() dev_loss += loss.data[0] return dev_loss / num_batches
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss = 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): q1_var, q1_len, q2_var, q2_len = helper.batch_to_tensor( dev_batches[batch_no - 1], self.dictionary, reverse=self.config.reverse, iseval=True) if self.config.cuda: q1_var = q1_var.cuda() # batch_size x max_len q2_var = q2_var.cuda() # batch_size x max_len q2_len = q2_len.cuda() # batch_size loss = self.model(q1_var, q1_len, q2_var, q2_len) dev_loss += loss.data[0] return dev_loss / num_batches
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) dev_loss, dev_click_loss, dev_decoding_loss = 0, 0, 0 num_batches = len(dev_batches) for batch_no in range(1, num_batches + 1): session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels = helper.session_to_tensor( dev_batches[batch_no - 1], self.dictionary, True) if self.config.cuda: session_queries = session_queries.cuda() session_query_length = session_query_length.cuda() rel_docs = rel_docs.cuda() rel_docs_length = rel_docs_length.cuda() doc_labels = doc_labels.cuda() click_loss, decoding_loss = self.model(session_queries, session_query_length, rel_docs, rel_docs_length, doc_labels) if click_loss.size(0) > 1: click_loss = click_loss.mean() if decoding_loss.size(0) > 1: decoding_loss = decoding_loss.mean() dev_click_loss += click_loss.data[0] dev_decoding_loss += decoding_loss.data[0] dev_loss += click_loss.data[0] + decoding_loss.data[0] print('validation loss = %.4f %.4f' % ((dev_click_loss / num_batches), (dev_decoding_loss / num_batches))) return dev_loss / num_batches
print('Train set size = ', len(train_corpus.data)) print('Dev set size = ', len(dev_corpus.data)) print('Vocabulary size = ', len(dictionary)) # save the dictionary object to use during testing helper.save_object(dictionary, args.save_path + 'dictionary.p') # embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file) # helper.save_word_embeddings('../data/glove/', 'glove.840B.300d.q2q.txt', embeddings_index, dictionary.idx2word) embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, 'glove.840B.300d.q2q.txt') print('Number of OOV words = ', len(dictionary) - len(embeddings_index)) # Splitting the data in batches train_batches = helper.batchify(train_corpus.data, args.batch_size) print('Number of train batches = ', len(train_batches)) dev_batches = helper.batchify(dev_corpus.data, args.batch_size) print('Number of dev batches = ', len(dev_batches)) # ############################################################################### # # Build the model # ############################################################################### model = Sequence2Sequence(dictionary, embeddings_index, args) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), args.lr) best_loss = -1 # for training on multiple GPUs. set multiple GPUs by setting CUDA_VISIBLE_DEVICES, ex., CUDA_VISIBLE_DEVICES=0,1 if 'CUDA_VISIBLE_DEVICES' in os.environ:
if 'imdb' in args.task: ############################################################################### # Load Learning to Skim paper's Pickle file ############################################################################### train_d, dev_d, test_d = helper.get_splited_imdb_data( args.save_path + 'data/' + 'imdb.p') test_corpus.parse(test_d, task, args.max_example) elif task == 'multinli' and args.test != 'train': for partition in ['_matched', '_mismatched']: test_corpus.parse( args.data + task + '/' + args.test + partition + '.txt', task, args.max_example) print('[' + partition[1:] + '] dataset size = ', len(test_corpus.data)) test_batches = helper.batchify(test_corpus.data, args.batch_size) if args.test == 'test': evaluate(model, test_batches, dictionary, args.save_path + args.task + partition + '.csv') else: test_accuracy, test_f1 = evaluate(model, test_batches, dictionary) print('[' + partition[1:] + '] accuracy: %.2f%%' % test_accuracy) print('[' + partition[1:] + '] f1: %.2f%%' % test_f1) else: test_corpus.parse(args.data + task + '/' + args.test + '.txt', task, args.max_example) print('dataset size = ', len(test_corpus.data)) test_batches = helper.batchify(test_corpus.data, args.batch_size) test_accuracy, test_f1, clf_report = evaluate(model, test_batches,
def train(self): # Turn on training mode which enables dropout. self.generator.train() # Splitting the data in batches batches, batch_labels = [], [] for task_name, task in self.train_corpus.items(): train_batches = helper.batchify(task.data, self.config.batch_size) batches.extend(train_batches) batch_labels.extend([task_name] * len(train_batches)) combined = list(zip(batches, batch_labels)) numpy.random.shuffle(combined) batches[:], batch_labels[:] = zip(*combined) print('number of train batches = ', len(batches)) start = time.time() num_back, print_acc_total, plot_acc_total = 0, 0, 0 num_batches = len(batches) for batch_no in range(1, num_batches + 1): if self.config.use_elmo: train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_elmo_input( batches[batch_no - 1], self.dictionary) else: train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) if self.config.adversarial: self.optimizerD.zero_grad() scores, diff_loss, shared_rep = self.generator( train_sentences1, sent_len1, train_sentences2, sent_len2, batch_labels[batch_no - 1]) n_correct = (torch.max(scores, 1)[1].view( train_labels.size()).data == train_labels.data).sum() shared_sent_rep1 = shared_rep[0] shared_sent_rep2 = shared_rep[1] # runt the discriminator to distinguish tasks task_prob1 = self.discriminator( shared_sent_rep1.detach()) # B X num_tasks task_prob2 = self.discriminator( shared_sent_rep2.detach()) # B X num_tasks comb_prob = torch.cat((task_prob1, task_prob2), 0) # 2B X num_tasks task_prob = torch.sum(comb_prob, 0).squeeze() # size = |num_tasks| adv_loss = -1 * task_prob[self.task_ids[batch_labels[batch_no - 1]]] adv_loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.discriminator.parameters()), self.config.max_norm) self.optimizerD.step() self.optimizerG.zero_grad() cross_entropy_loss = self.criterion(scores, train_labels) # runt the discriminator to distinguish tasks task_prob1 = self.discriminator( shared_sent_rep1) # B X num_tasks task_prob2 = self.discriminator( shared_sent_rep2) # B X num_tasks comb_prob = torch.cat((task_prob1, task_prob2), 0) # 2B X num_tasks task_prob = torch.sum(comb_prob, 0).squeeze() # size = |num_tasks| adv_loss = -1 * task_prob[self.task_ids[batch_labels[batch_no - 1]]] total_loss = cross_entropy_loss + self.config.beta * adv_loss + self.config.gamma * diff_loss # Important if we are using nn.DataParallel() if total_loss.size(0) > 1: total_loss = total_loss.mean() total_loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.generator.parameters()), self.config.max_norm) self.optimizerG.step() else: self.optimizerG.zero_grad() scores = self.generator(train_sentences1, sent_len1, train_sentences2, sent_len2, batch_labels[batch_no - 1]) n_correct = (torch.max(scores, 1)[1].view( train_labels.size()).data == train_labels.data).sum() loss = self.criterion(scores, train_labels) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.generator.parameters()), self.config.max_norm) self.optimizerG.step() print_acc_total += 100. * n_correct / len(batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len(batches[batch_no - 1]) if batch_no % self.config.print_every == 0: sys.stdout.write("\b" * num_back) sys.stdout.write(" " * num_back) sys.stdout.write("\b" * num_back) log_info = '%s (%d %d%%) %.2f%%' % (helper.show_progress( start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_total / batch_no) sys.stdout.write(log_info) sys.stdout.flush() num_back = len(log_info) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0 # this releases all cache memory and becomes visible to other applications torch.cuda.empty_cache()
multinli_test_mismatched = data.Corpus(args.data + 'multinli/', dictionary) multinli_test_mismatched.parse('test_mismatched.txt', 'multinli', args.tokenize, is_test_corpus=True) print('mutinli test[mismatched] set size = ', len(multinli_test_mismatched.data)) tasks.append(('multinli', 3)) if tasks: model = Generator(dictionary, embeddings_index, args, tasks) print(model) if args.cuda: model = model.cuda() helper.load_model_states_from_checkpoint(model, args.save_path + 'model_best.pth.tar', 'state_dict_G', args.cuda) print('vocabulary size = ', len(dictionary)) if 'quora' in args.task: test_batches = helper.batchify(quora_test.data, args.batch_size) test_accuracy = evaluate(model, test_batches, 'quora', dictionary) print('quora test accuracy: %f%%' % test_accuracy) if 'snli' in args.task: test_batches = helper.batchify(snli_test.data, args.batch_size) test_accuracy = evaluate(model, test_batches, 'snli', dictionary) print('snli test accuracy: %f%%' % test_accuracy) if 'multinli' in args.task: # test matched part test_batches = helper.batchify(multinli_test_matched.data, args.batch_size) evaluate(model, test_batches, 'multinli', dictionary, args.save_path + 'multinli_matched.csv') # test mismatched part test_batches = helper.batchify(multinli_test_mismatched.data, args.batch_size) evaluate(model, test_batches, 'multinli', dictionary, args.save_path + 'multinli_mismatched.csv')
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2) n_correct = (torch.max(score, 1)[1].view(train_labels.size()).data == train_labels.data).sum() loss = self.criterion(score, train_labels) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() # gradient clipping (off by default) shrink_factor = 1 total_norm = 0 for p in self.model.parameters(): if p.requires_grad: p.grad.data.div_(train_sentences1.size(0)) # divide by the actual batch size total_norm += p.grad.data.norm() ** 2 total_norm = numpy.sqrt(total_norm) if total_norm > self.config.clip: shrink_factor = self.config.clip / total_norm current_lr = self.optimizer.param_groups[0]['lr'] # current lr (no external "lr", for adam) self.optimizer.param_groups[0]['lr'] = current_lr * shrink_factor # just for update self.optimizer.step() self.optimizer.param_groups[0]['lr'] = current_lr print_acc_total += 100. * n_correct / len(train_batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len(train_batches[batch_no - 1]) if batch_no % self.config.print_every == 0: print_acc_avg = print_acc_total / self.config.print_every print_acc_total = 0 print('%s (%d %d%%) %.2f' % ( helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_avg)) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0
def train(self, train_corpus, epoch): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches shuffle = True # if self.config.task == 'sst': shuffle = False print(shuffle) train_batches = helper.batchify(train_corpus.data, self.config.batch_size, shuffle) print('number of train batches = ', len(train_batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2) n_correct = (torch.max(score, 1)[1].view( train_labels.size()).data == train_labels.data).sum() # print (' score size ', score.size(), train_labels.size()) loss = self.criterion(score, train_labels) ############################ custom new_loss ############################ # z2 = z_pred.dimshuffle((0,1,"x")) # logpz = - T.nnet.binary_crossentropy(probs, z2) * masks # logpz = self.logpz = logpz.reshape(x.shape) # probs = self.probs = probs.reshape(x.shape) # # batch # z = z_pred # self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX) # self.zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX) # zsum = generator.zsum # zdiff = generator.zdiff # logpz = generator.logpz # coherent_factor = args.sparsity * args.coherent # loss = self.loss = T.mean(loss_vec) #this is not needed as in cost_vec loss_vec is used # sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \ # T.mean(zdiff) * coherent_factor # cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor # cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0)) # self.obj = T.mean(cost_vec) ############################ custom new_loss ############################ if loss.size(0) > 1: loss = loss.mean() # print ('loss:', loss) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. grad_norm = clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) # if epoch==11: # print(batch_no, grad_norm) self.optimizer.step() print_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) if batch_no % self.config.print_every == 0: print_acc_avg = print_acc_total / self.config.print_every print_acc_total = 0 print('%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_avg)) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0
is_test_corpus=True) print('mutinli dev[mismatched] set size = ', len(multinli_dev_mismatched.data)) tasks.append(('multinli', 3)) if tasks: model = Generator(dictionary, embeddings_index, args, tasks) if args.cuda: model = model.cuda() helper.load_model_states_from_checkpoint( model, args.save_path + 'model_best.pth.tar', 'state_dict_G', args.cuda) print('vocabulary size = ', len(dictionary)) if 'quora' in args.task: dev_batches = helper.batchify(quora_dev.data, args.batch_size) dev_accuracy = evaluate(model, dev_batches, 'quora', dictionary) print('quora dev accuracy: %f%%' % dev_accuracy) if 'snli' in args.task: dev_batches = helper.batchify(snli_dev.data, args.batch_size) dev_accuracy = evaluate(model, dev_batches, 'snli', dictionary) print('snli dev accuracy: %f%%' % dev_accuracy) if 'multinli' in args.task: # test matched part dev_batches = helper.batchify(multinli_dev_matched.data, args.batch_size) dev_accuracy = evaluate(model, dev_batches, 'multinli', dictionary) print('mutinli [matched] dev accuracy: %f%%' % dev_accuracy) # test mismatched part
def main(): ############################################################################### # Load data ############################################################################### dictionary = data.Dictionary() train_corpus = data.Corpus(dictionary) dev_corpus = data.Corpus(dictionary) test_corpus = data.Corpus(dictionary) task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task] for task in task_names: skip_first_line = True if task == 'sick' else False train_corpus.parse(task, args.data, 'train.txt', args.tokenize, num_examples=args.max_example, skip_first_line=skip_first_line) if task == 'multinli': dev_corpus.parse(task, args.data, 'dev_matched.txt', args.tokenize) dev_corpus.parse(task, args.data, 'dev_mismatched.txt', args.tokenize) test_corpus.parse(task, args.data, 'test_matched.txt', args.tokenize, is_test_corpus=False) test_corpus.parse(task, args.data, 'test_mismatched.txt', args.tokenize, is_test_corpus=False) else: dev_corpus.parse(task, args.data, 'dev.txt', args.tokenize, skip_first_line=skip_first_line) test_corpus.parse(task, args.data, 'test.txt', args.tokenize, is_test_corpus=False, skip_first_line=skip_first_line) print('train set size = ', len(train_corpus.data)) print('development set size = ', len(dev_corpus.data)) print('test set size = ', len(test_corpus.data)) print('vocabulary size = ', len(dictionary)) # save the dictionary object to use during testing helper.save_object(dictionary, args.save_path + args.task + '_dictionary.pkl') embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file, dictionary.word2idx) print('number of OOV words = ', len(dictionary) - len(embeddings_index)) # ############################################################################### # # Build the model # ############################################################################### model = SentenceClassifier(dictionary, embeddings_index, args) optim_fn, optim_params = helper.get_optimizer(args.optimizer) optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()), **optim_params) best_acc = 0 if args.cuda: model = model.cuda() if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # ############################################################################### # # Train the model # ############################################################################### train = Train(model, optimizer, dictionary, embeddings_index, args, best_acc) bestmodel = train.train_epochs(train_corpus, dev_corpus, args.start_epoch, args.epochs) test_batches = helper.batchify(test_corpus.data, args.batch_size) if 'multinli' in task_names: print( 'Skipping evaluating best model. Evaluate using the test script.') else: test_accuracy, test_f1 = evaluate(bestmodel, test_batches, dictionary) print('accuracy: %.2f%%' % test_accuracy) print('f1: %.2f%%' % test_f1)
def train(self, train_corpus, epoch): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches shuffle = True # if self.config.task == 'sst': shuffle = False print(shuffle) train_batches = helper.batchify(train_corpus.data, self.config.batch_size, shuffle) print('number of train batches = ', len(train_batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) # print(' train label size: ', train_labels.size(), ' train data size: ', train_sentences1.size()) # print(' labels: ', train_labels) score = self.model(train_sentences1) n_correct = (torch.max(score, 1)[1].view( train_labels.size()).data == train_labels.data).sum() # print (' score size ', score.size(), train_labels.size()) loss = self.criterion(score, train_labels) if loss.size(0) > 1: loss = loss.mean() # print ('loss:', loss) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. grad_norm = clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) # if epoch==11: # print(batch_no, grad_norm) self.optimizer.step() print_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) if batch_no % self.config.print_every == 0: print_acc_avg = print_acc_total / self.config.print_every print_acc_total = 0 print('%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_avg)) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0