def evaluate_switch(model, linear_layer, data, my_vocab): all_paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in data ] all_paragraph_lengths = [len(this_sample) for this_sample in data] sentence_cands = [] for i in range(2000): sentence_cands += all_paragraphs[i][0] total_corrects = 0 total_samples = 0 for current_batch in range(int((len(data) - 1) / batch_size) + 1): paragraphs = all_paragraphs[current_batch * batch_size:(current_batch + 1) * batch_size] paragraph_lengths = all_paragraph_lengths[current_batch * batch_size:(current_batch + 1) * batch_size] masked_paragraphs, masks = switch_sentence(paragraphs, sentence_cands) embeds = model(masked_paragraphs) #print(len(pos_score), len(neg_score)) this_batch_size, doc_size, embed_dim = embeds.size() embeds = embeds.view(-1, embed_dim) sigmoid = nn.Sigmoid() scores = sigmoid(linear_layer(embeds).view(this_batch_size, doc_size)) labels = torch.cat(masks).long().to(device) scores = filter_output(scores.view(-1), paragraph_lengths) preds = scores.ge(0.5).long().to(device) corrects = torch.sum(labels == preds) total_corrects += corrects total_samples += len(preds) return float(total_corrects) / total_samples
def train(train_data, dev_data, my_vocab, train_target, dev_target): #model = None embed_model = MyModel(my_vocab) #model = nn.DataParallel(model) embed_model = embed_model if classifier_embed_model_path is not None: embed_model = torch.load(classifier_embed_model_path) #criteria = torch.nn.CrossEntropyLoss() model = ClassificationModel(embed_model, hidden_dim * 2, num_classes) model = model.to(device) #criteria = torch.nn.MSELoss() criteria = torch.nn.CrossEntropyLoss() model_optim = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate) best_acc = -1 writer = SummaryWriter(exp_name) #print(len(train_data)) all_paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in train_data ] all_paragraph_lengths = [len(this_sample) for this_sample in train_data] train_idx = list(range(len(train_data))) for epoch_i in range(num_epoch): random.shuffle(train_idx) total_loss = 0 total_batch = 0 all_paragraphs = [all_paragraphs[i] for i in train_idx] all_paragraph_lengths = [all_paragraph_lengths[i] for i in train_idx] train_target = [train_target[i] for i in train_idx] for current_batch in range( int((len(train_data) - 1) / batch_size) + 1): if current_batch % 100 == 0: print(current_batch) model_optim.zero_grad() paragraphs = all_paragraphs[current_batch * batch_size:(current_batch + 1) * batch_size] paragraph_lengths = all_paragraph_lengths[current_batch * batch_size: (current_batch + 1) * batch_size] scores = model(paragraphs) targets = train_target[current_batch * batch_size:(current_batch + 1) * batch_size] labels = torch.tensor(targets).to(device) loss = criteria(scores, labels) #print(loss) total_loss += loss.item() total_batch += 1 loss.backward() model_optim.step() acc = evaluate_classifier(model, dev_data, dev_target, my_vocab) if acc > best_acc: torch.save(model, classifier_model_path) best_acc = acc writer.add_scalar('accuracy', acc, epoch_i)
def evaluate(model, data, my_vocab): all_paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in data ] all_paragraph_lengths = [len(this_sample) for this_sample in data] total_corrects = 0 total_samples = 0 for current_batch in range(int((len(data) - 1) / batch_size) + 1): paragraphs = all_paragraphs[current_batch * batch_size:(current_batch + 1) * batch_size] paragraph_lengths = all_paragraph_lengths[current_batch * batch_size:(current_batch + 1) * batch_size] masked_paragraphs, masks, cand_pool = mask_sentence(paragraphs) outs, pool_sent_embeds = model(masked_paragraphs, cand_pool) corrects, batch_samples = compute_score(outs, pool_sent_embeds, masks) total_corrects += corrects total_samples += batch_samples return float(total_corrects) / total_samples
def evaluate_sorter(model, linear_layer, data, my_vocab, cand_permuts): num_to_sort = 3 all_paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in data ] all_paragraph_lengths = [len(this_sample) for this_sample in data] sentence_cands = [] for i in range(min(2000, len(all_paragraphs))): sentence_cands += all_paragraphs[i][0] total_corrects = 0 total_samples = 0 for current_batch in range(int((len(data) - 1) / batch_size) + 1): paragraphs = all_paragraphs[current_batch * batch_size:(current_batch + 1) * batch_size] paragraph_lengths = all_paragraph_lengths[current_batch * batch_size:(current_batch + 1) * batch_size] masked_paragraphs, start_idx, labels = local_sort_sentence( paragraphs, cand_permuts) if len(masked_paragraphs) < 1: continue embeds = model(masked_paragraphs) #print(len(pos_score), len(neg_score)) this_batch_size, doc_size, embed_dim = embeds.size() idx_1, idx_2 = get_fetch_idx(this_batch_size, start_idx) #print(idx_1, idx_2) #print(embeds.size()) embeds_to_sort = embeds[idx_1, idx_2, :].view(this_batch_size, num_to_sort, -1) #print(embeds_to_sort.size()) scores = linear_layer(embeds_to_sort) preds = scores.argmax(1) labels = torch.tensor(labels).to(device) corrects = torch.sum(labels == preds) total_corrects += corrects total_samples += len(preds) return float(total_corrects) / total_samples
def evaluate_classifier(model, data, labels, my_vocab): all_paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in data ] all_paragraph_lengths = [len(this_sample) for this_sample in data] acc_total = 0 correct_total = 0 for current_batch in range(int((len(data) - 1) / batch_size) + 1): batch_data = data[current_batch * batch_size:(current_batch + 1) * batch_size] paragraphs = all_paragraphs[current_batch * batch_size:(current_batch + 1) * batch_size] paragraph_lengths = all_paragraph_lengths[current_batch * batch_size:(current_batch + 1) * batch_size] scores = model(paragraphs) targets = labels[current_batch * batch_size:(current_batch + 1) * batch_size] targets = torch.tensor(targets).to(device) pred_idx = scores.argmax(-1) acc_total += len(targets) correct_total += torch.sum(targets == pred_idx) return float(correct_total) / acc_total
def train(train_data, dev_data, my_vocab, train_target, dev_target, dev_target_txt): #model = None embed_model = MyModel(my_vocab) #model = nn.DataParallel(model) embed_model = embed_model if summarizer_embed_model_path is not None: embed_model = torch.load(summarizer_embed_model_path) #load_embed_model = torch.load(summarizer_embed_model_path) #sent_enc_stat = load_embed_model.sentence_encoder.state_dict() #embed_model.sentence_encoder.load_state_dict(sent_enc_stat) #criteria = torch.nn.CrossEntropyLoss() model = SummarizeModel(embed_model, hidden_dim * 2) model = model.to(device) criteria = torch.nn.MSELoss() model_optim = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate) best_acc = -1 writer = SummaryWriter(exp_name) #print(len(train_data)) #all_paragraphs = [build_paragraph(this_sample, my_vocab) # for this_sample in train_data] #all_paragraph_lengths = [len(this_sample) for this_sample in train_data] train_idx = list(range(len(train_data))) for epoch_i in range(num_epoch): total_loss = 0 total_batch = 0 train_data = [train_data[i] for i in train_idx] #all_paragraphs = [all_paragraphs[i] for i in train_idx] #all_paragraph_lengths = [all_paragraph_lengths[i] for i in train_idx] train_target = [train_target[i] for i in train_idx] random.shuffle(train_idx) for current_batch in range( int((len(train_data) - 1) / batch_size) + 1): if current_batch % 100 == 0: print(current_batch) model_optim.zero_grad() batch_data = train_data[current_batch * batch_size:(current_batch + 1) * batch_size] targets = train_target[current_batch * batch_size:(current_batch + 1) * batch_size] paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in batch_data ] paragraph_lengths = [ len(this_sample) for this_sample in batch_data ] #paragraphs = all_paragraphs[current_batch*batch_size: # (current_batch+1)*batch_size] #paragraph_lengths = all_paragraph_lengths[current_batch*batch_size: # (current_batch+1)*batch_size] scores = model(paragraphs) num_doc, doc_size = scores.size() labels = torch.zeros(num_doc, doc_size).to(device) for i, this_target in enumerate(targets): #print(labels[i], this_target) #print(this_target) #print(i) #print(labels.size()) #print(labels) #print(labels[i]) if len(this_target) > 0: labels[i][this_target] = 1 labels = filter_output(labels.view(-1), paragraph_lengths) scores = filter_output(scores.view(-1), paragraph_lengths) loss = criteria(scores, labels) #print(loss) total_loss += loss.item() total_batch += 1 loss.backward() model_optim.step() acc, recall, scores = evaluate_summarizer(model, dev_data, dev_target, my_vocab, dev_target_txt) with open(score_path + '_' + str(epoch_i) + '_score', 'w') as f_out: json.dump(scores, f_out) scores = [ scores['rouge_1_f_score'], scores['rouge_2_f_score'], scores['rouge_l_f_score'] ] torch.save(model, summarizer_model_path + '_' + str(epoch_i) + '.pt') writer.add_scalar('accuracy', acc, epoch_i) writer.add_scalar('recall', recall, epoch_i) writer.add_scalar('avg_loss', total_loss / total_batch, epoch_i) writer.add_scalar('rouge_1', scores[0], epoch_i) writer.add_scalar('rouge_2', scores[1], epoch_i) writer.add_scalar('rouge_l', scores[2], epoch_i)
def evaluate_summarizer(model, data, labels, my_vocab, target_src, is_eval=False): all_paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in data ] all_paragraph_lengths = [len(this_sample) for this_sample in data] sel_top_k = 3 acc_total = 0 recall_total = 0 correct_total = 0 predict_txt = [] for current_batch in range(int((len(data) - 1) / batch_size) + 1): batch_data = data[current_batch * batch_size:(current_batch + 1) * batch_size] paragraphs = all_paragraphs[current_batch * batch_size:(current_batch + 1) * batch_size] paragraph_lengths = all_paragraph_lengths[current_batch * batch_size:(current_batch + 1) * batch_size] scores = model(paragraphs) if is_eval: this_batch_size, doc_size = scores.size() masks = gen_mask_based_length(this_batch_size, doc_size, paragraph_lengths) scores = scores * masks if labels is not None: targets = labels[current_batch * batch_size:(current_batch + 1) * batch_size] _, pred_idx = scores.topk(sel_top_k, -1) for i, this_target in enumerate(targets): recall_total += len(this_target) acc_total += sel_top_k correct_total += len( [pred for pred in pred_idx[i] if pred in this_target]) pred_sentences = [ batch_data[i][j] for j in pred_idx[i] if j < len(batch_data[i]) ] if len(pred_sentences) == 0: pred_sentences = batch_data[i][:sel_top_k] #print(pred_sentences) joined_sentences = [ ' '.join(sentence) for sentence in pred_sentences ] predict_txt.append('\n'.join(joined_sentences)) else: _, pred_idx = scores.topk(sel_top_k, -1) for i in range(len(batch_data)): pred_sentences = [ batch_data[i][j] for j in pred_idx[i] if j < len(batch_data[i]) ] #pred_sentences = batch_data[i][:sel_top_k] if len(pred_sentences) == 0: pred_sentences = batch_data[i][:sel_top_k] #print(pred_sentences) joined_sentences = [ ' '.join(sentence) for sentence in pred_sentences ] predict_txt.append('\n'.join(joined_sentences)) #scores = compute_rouge_score(predict_txt, target_src) scores = rouge_eval(target_src, predict_txt) #print(scores) if labels is not None: return float(correct_total)/acc_total, float(correct_total)/recall_total,\ scores else: return -1, -1, scores '''
def train(train_data, dev_data, my_vocab, train_target, dev_target): #model = None num_to_sort = 3 cand_permuts = list(itertools.permutations(list(range(num_to_sort)))) model = MyModel(my_vocab) #model = nn.DataParallel(model) model = model.to(device) if model_to_load is not None: model = torch.load(model_to_load).to(device) #criteria = torch.nn.CrossEntropyLoss() model_optim = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate) classification_layer = LinearRegressionModel(hidden_dim * 2, 1) #classification_layer = LocalSorterModel(hidden_dim*2, num_to_sort) classification_layer = classification_layer.to(device) classifier_optim = optim.Adam(classification_layer.parameters(), lr=learning_rate) best_acc = -1 writer = SummaryWriter(exp_name) #print(len(train_data)) all_paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in train_data ] all_paragraph_lengths = [len(this_sample) for this_sample in train_data] train_idx = list(range(len(train_data))) for epoch_i in range(num_epoch): #mask_loss = 0 #replace_loss = 0 switch_loss = 0 #sorter_loss = 0 total_batch = 0 all_paragraphs = [all_paragraphs[i] for i in train_idx] all_paragraph_lengths = [all_paragraph_lengths[i] for i in train_idx] sentence_cands = [] for i in range(min(10000, len(all_paragraphs))): sentence_cands += all_paragraphs[i][0] random.shuffle(train_idx) for current_batch in range( int((len(train_data) - 1) / batch_size) + 1): if current_batch % 100 == 0: print(current_batch) model_optim.zero_grad() classification_layer.zero_grad() paragraphs = all_paragraphs[current_batch * batch_size:(current_batch + 1) * batch_size] paragraph_lengths = all_paragraph_lengths[current_batch * batch_size: (current_batch + 1) * batch_size] #loss = train_replace(model, classification_layer, # paragraphs, paragraph_lengths, # sentence_cands) #loss = train_mask(model, paragraphs, paragraph_lengths) loss = train_switch(model, classification_layer, paragraphs, paragraph_lengths, sentence_cands) #loss = train_sorter(model, classification_layer, # paragraphs, paragraph_lengths, # cand_permuts) #print(loss) #mask_loss += loss.item() #replace_loss += loss.item() switch_loss += loss.item() #sorter_loss += loss.item() total_batch += 1 loss.backward() model_optim.step() classifier_optim.step() ''' cls_model_optim.zero_grad() targets = train_target[current_batch*batch_size: (current_batch+1)*batch_size] loss = train_cls_task(cls_model, paragraphs, paragraph_lengths, targets) cls_loss += loss.item() loss.backward() cls_model_optim.step() ''' #mask_acc = evaluate(model, dev_data, my_vocab) #sorter_acc = evaluate_sorter(model, classification_layer, dev_data, my_vocab, cand_permuts) #replace_acc = evaluate_replace(model, classification_layer, # dev_data, my_vocab) switch_acc = evaluate_switch(model, classification_layer, dev_data, my_vocab) if switch_acc > best_acc: torch.save(model, model_path) best_acc = switch_acc #writer.add_scalar('mask_accuracy', mask_acc, epoch_i) #writer.add_scalar('avg_mask_loss', mask_loss/total_batch, epoch_i) #writer.add_scalar('replace_accuracy', replace_acc, epoch_i) #writer.add_scalar('avg_replace_loss', replace_loss/total_batch, epoch_i) writer.add_scalar('switch_accuracy', switch_acc, epoch_i) writer.add_scalar('avg_switch_loss', switch_loss / total_batch, epoch_i)