def feed_samples(model, samples, loss_function, device, word_alignment_model=None, sent_alignment_model=None): sentences, targets, words = process_samples(samples, device) # rank sentences with lengths ranked_sentences, alignment_sentence_indexs = ranking_sequence(sentences) ranked_words, alignment_words_indexs = ranking_sequence(words) sentence_lengths = [len(sentence) for sentence in ranked_sentences] words_lengths = [len(word) for word in ranked_words] # Padding pad_sentences = torch.nn.utils.rnn.pad_sequence(ranked_sentences) pad_sentences = pad_sentences.to(device) pad_words = torch.nn.utils.rnn.pad_sequence(ranked_words) pad_words = pad_words.to(device) model.zero_grad() if word_alignment_model is not None: word_alignment_model.zero_grad() sent_alignment_model.zero_grad() output = model(pad_sentences, pad_words, alignment_sentence_indexs, alignment_words_indexs, sentence_lengths, words_lengths, word_alignment_model, sent_alignment_model) loss = loss_function(output, targets) loss.backward() return loss
def evaluate_model(model, testing_data, batch_size, device, word_alignment_model=None, sent_alignment_model=None): num_correct = 0 for i in range((len(testing_data) - 1) // batch_size + 1): samples = testing_data[i * batch_size:(i + 1) * batch_size] sentences, targets, words = \ process_testing_samples(samples, device) ranked_words, alignment_words_indexs = \ ranking_sequence(words) ranked_sentences, alignment_sentence_indexs = \ ranking_sequence(sentences) words_lengths = [len(word) for word in ranked_words] sentence_lengths = [len(sentence) for sentence in ranked_sentences] pad_sentences = torch.nn.utils.rnn.pad_sequence(ranked_sentences) pad_sentences = pad_sentences.to(device) pad_words = torch.nn.utils.rnn.pad_sequence(ranked_words) pad_words = pad_words.to(device) output = model(pad_sentences, pad_words, alignment_sentence_indexs, alignment_words_indexs, sentence_lengths, words_lengths, word_alignment_model, sent_alignment_model) for j, one_pred in enumerate(output): if torch.argmax(one_pred) == targets[j]: num_correct += 1 return float(num_correct) / len(testing_data)
def gen_fisher(model, train_data, all_relations): num_correct = 0 #testing_data = testing_data[0:100] softmax_func = nn.LogSoftmax(0) loss_func = nn.NLLLoss() fisher_batch_size = 1 batch_epoch = (len(train_data)-1)//fisher_batch_size+1 fisher = None for i in range(batch_epoch): model.zero_grad() losses = [] samples = train_data[i*fisher_batch_size:(i+1)*fisher_batch_size] questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) #print('got data') ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) #print(pad_questions) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) #print(pad_questions) model.init_hidden(device, sum(relation_set_lengths)) all_scores = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths) all_scores = all_scores.to('cpu') start_index = 0 for length in relation_set_lengths: scores = all_scores[start_index:start_index+length] start_index += length #print(scores) losses.append(loss_func(softmax_func(scores).view(1, -1), torch.tensor([0]))) loss_batch = sum(losses) #print(loss_batch) loss_batch.backward() grad_params = get_grad_params(model) #for param in grad_params: # print(param.grad) if fisher is None: fisher = [param.grad**2/batch_epoch for param in grad_params] else: fisher = [fisher[i]+param.grad**2/batch_epoch for i,param in enumerate(grad_params)] return fisher
def get_rel_embed(model, sample_list, all_relations, alignment_model, before_alignment=False): ret_rel_embeds = [] for i in range((len(sample_list) - 1) // batch_size + 1): samples = sample_list[i * batch_size:(i + 1) * batch_size] relations = [] for item in samples: this_relation = torch.tensor(all_relations[item[0]], dtype=torch.long).to(device) relations.append(this_relation) #print(len(relations)) model.init_hidden(device, len(relations)) ranked_relations, alignment_relation_indexs = \ ranking_sequence(relations) relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_relations) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) rel_embeds = model.compute_rel_embed(pad_relations, relation_lengths, alignment_relation_indexs, alignment_model, before_alignment) ret_rel_embeds.append(rel_embeds.detach().cpu().numpy()) return np.concatenate(ret_rel_embeds)
def save_rel_embeds(model, all_seen_rels, all_relations, file_name): rel_embeds = {} if model is not None and len(all_seen_rels) > 0: for i in range((len(all_seen_rels)-1)//batch_size+1): seen_rels_batch = all_seen_rels[i*batch_size:(i+1)*batch_size] relations = [torch.tensor(all_relations[i], dtype=torch.long).to(device) for i in seen_rels_batch] model.init_hidden(device, len(relations)) ranked_relations, reverse_relation_indexs = \ ranking_sequence(relations) relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_relations) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) new_rel_embeds = model.compute_rel_embed(pad_relations, relation_lengths, reverse_relation_indexs) for i, rel in enumerate(seen_rels_batch): rel_embeds[rel] = new_rel_embeds[i].cpu().numpy() rels = list(rel_embeds.keys()) values = rel_embeds.values() with open(file_name, 'w') as writer: writer.write(str(rels)+'\n') for embed in values: to_write = [round(x, 6) for x in embed] writer.write(str(to_write)+'\n')
def compute_diff_scores(model, samples, batch_size, all_relations, device): #testing_data = testing_data[0:100] for i in range((len(samples)-1)//batch_size+1): samples = samples[i*batch_size:(i+1)*batch_size] questions, relations, relation_set_lengths = \ process_samples(samples, all_relations, device) model.init_hidden(device, sum(relation_set_lengths)) ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) all_scores = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths) start_index = 0 diff_scores = [] #print('len of relation_set:', len(relation_set_lengths)) for j in range(len(relation_set_lengths)): length = relation_set_lengths[j] ''' cand_indexs = samples[j][1] gold_pos = np.where(np.array(cand_indexs) == gold_relation_indexs[j])[0] print('gold pos', gold_pos) print('gold_index', gold_relation_indexs[j]) print('cand index', cand_indexs) other_pos = np.where(np.array(cand_indexs) != gold_relation_indexs[j])[0] print('other_pos', other_pos) ''' this_scores = all_scores[start_index:start_index + length] gold_score = this_scores[0] #print('gold score',gold_score) neg_scores = this_scores[1:] #print('neg score', neg_scores) diff_scores.append(gold_score - neg_scores.max()) #print('scores:', all_scores[start_index:start_index+length]) #print('cand indexs:', cand_indexs) #print('pred, true:',pred_index, gold_relation_indexs[j]) start_index += length return diff_scores
def feed_samples(model, samples, loss_function, all_relations, device, alignment_model=None): questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) ranked_questions, alignment_question_indexs = \ ranking_sequence(questions) ranked_relations, alignment_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) model.zero_grad() if alignment_model is not None: alignment_model.zero_grad() model.init_hidden(device, sum(relation_set_lengths)) all_scores = model(pad_questions, pad_relations, device, alignment_question_indexs, alignment_relation_indexs, question_lengths, relation_lengths, alignment_model) all_scores = all_scores.to('cpu') pos_scores = [] neg_scores = [] pos_index = [] start_index = 0 for length in relation_set_lengths: pos_index.append(start_index) pos_scores.append(all_scores[start_index].expand(length - 1)) neg_scores.append(all_scores[start_index + 1:start_index + length]) start_index += length pos_scores = torch.cat(pos_scores) neg_scores = torch.cat(neg_scores) alignment_model_criterion = nn.MSELoss() loss = loss_function( pos_scores, neg_scores, torch.ones(sum(relation_set_lengths) - len(relation_set_lengths))) loss.backward() return all_scores, loss
def evaluate_model(model, testing_data, batch_size, all_relations, device, reverse_model=None): #print('start evaluate') num_correct = 0 #testing_data = testing_data[0:100] for i in range((len(testing_data)-1)//batch_size+1): samples = testing_data[i*batch_size:(i+1)*batch_size] gold_relation_indexs, questions, relations, relation_set_lengths = \ process_testing_samples(samples, all_relations, device) model.init_hidden(device, sum(relation_set_lengths)) ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) all_scores = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths, reverse_model) start_index = 0 pred_indexs = [] #print('len of relation_set:', len(relation_set_lengths)) for j in range(len(relation_set_lengths)): length = relation_set_lengths[j] cand_indexs = samples[j][1] pred_index = (cand_indexs[ all_scores[start_index:start_index+length].argmax()]) if pred_index == gold_relation_indexs[j]: num_correct += 1 #print('scores:', all_scores[start_index:start_index+length]) #print('cand indexs:', cand_indexs) #print('pred, true:',pred_index, gold_relation_indexs[j]) start_index += length #print(cand_scores[-1]) #print('num correct:', num_correct) #print('correct rate:', float(num_correct)/len(testing_data)) return float(num_correct)/len(testing_data)
def feed_samples(model, samples, loss_function, all_relations, device): questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) #print('got data') ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) #print(pad_questions) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) #print(pad_questions) model.zero_grad() model.init_hidden(device, sum(relation_set_lengths)) all_scores = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths) all_scores = all_scores.to('cpu') pos_scores = [] neg_scores = [] start_index = 0 for length in relation_set_lengths: pos_scores.append(all_scores[start_index].expand(length-1)) neg_scores.append(all_scores[start_index+1:start_index+length]) start_index += length pos_scores = torch.cat(pos_scores) neg_scores = torch.cat(neg_scores) loss = loss_function(pos_scores, neg_scores, torch.ones(sum(relation_set_lengths)- len(relation_set_lengths))) loss.backward()
def update_rel_embed(model, all_seen_rels, all_relations, rel_embeds): if model is not None and len(all_seen_rels) > 0: for i in range((len(all_seen_rels)-1)//batch_size+1): seen_rels_batch = all_seen_rels[i*batch_size:(i+1)*batch_size] relations = [torch.tensor(all_relations[i], dtype=torch.long).to(device) for i in seen_rels_batch] model.init_hidden(device, len(relations)) ranked_relations, reverse_relation_indexs = \ ranking_sequence(relations) relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_relations) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) new_rel_embeds = model.compute_rel_embed(pad_relations, relation_lengths, reverse_relation_indexs) for i, rel in enumerate(seen_rels_batch): rel_embeds[rel] = new_rel_embeds[i].cpu().numpy()
def feed_samples(model, samples, loss_function, all_relations, device, reverse_model=None, memory_que_embed=[], memory_rel_embed=[]): questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) #print('got data') ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) #print(pad_questions) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) #print(pad_questions) model.zero_grad() if reverse_model is not None: reverse_model.zero_grad() model.init_hidden(device, sum(relation_set_lengths)) all_scores, cur_que_embed, cur_rel_embed = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths, reverse_model, ret_embeds=True) all_scores = all_scores.to('cpu') pos_scores = [] neg_scores = [] pos_index = [] start_index = 0 for length in relation_set_lengths: pos_index.append(start_index) pos_scores.append(all_scores[start_index].expand(length - 1)) neg_scores.append(all_scores[start_index + 1:start_index + length]) start_index += length pos_scores = torch.cat(pos_scores) neg_scores = torch.cat(neg_scores) reverse_model_criterion = nn.MSELoss() loss = loss_function( pos_scores, neg_scores, torch.ones(sum(relation_set_lengths) - len(relation_set_lengths))) #if reverse_model is not None and len(memory_que_embed) > 0 and False: #if False: alpha = 0.0 if len(memory_que_embed) > 0: reverse_model = reverse_model.to(device) que_y = torch.from_numpy(memory_que_embed) rel_y = torch.from_numpy(memory_rel_embed) que_out = cur_que_embed[pos_index].to('cpu') rel_out = cur_rel_embed[pos_index].to('cpu') loss = loss*alpha + (1-alpha)*(reverse_model_criterion(que_out, que_y) +\ reverse_model_criterion(rel_out, rel_y)) loss.backward() return all_scores, loss
def train(training_data, valid_data, vocabulary, embedding_dim, hidden_dim, device, batch_size, lr, model_path, embedding, all_relations, model=None, epoch=100, grad_means=[], grad_fishers=[], loss_margin=2.0): if model is None: torch.manual_seed(100) model = SimilarityModel(embedding_dim, hidden_dim, len(vocabulary), np.array(embedding), 1, device) loss_function = nn.MarginRankingLoss(loss_margin) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=lr) best_acc = 0 for epoch_i in range(epoch): #print('epoch', epoch_i) #training_data = training_data[0:100] for i in range((len(training_data) - 1) // batch_size + 1): samples = training_data[i * batch_size:(i + 1) * batch_size] questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) #print('got data') ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) #print(pad_questions) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) #print(pad_questions) model.zero_grad() model.init_hidden(device, sum(relation_set_lengths)) all_scores = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths) all_scores = all_scores.to('cpu') pos_scores = [] neg_scores = [] start_index = 0 for length in relation_set_lengths: pos_scores.append(all_scores[start_index].expand(length - 1)) neg_scores.append(all_scores[start_index + 1:start_index + length]) start_index += length pos_scores = torch.cat(pos_scores) neg_scores = torch.cat(neg_scores) loss = loss_function( pos_scores, neg_scores, torch.ones( sum(relation_set_lengths) - len(relation_set_lengths))) loss = loss.sum() #loss.to(device) #print(loss) for i in range(len(grad_means)): grad_mean = grad_means[i] grad_fisher = grad_fishers[i] #print(param_loss(model, grad_mean, grad_fisher, p_lambda)) loss += param_loss(model, grad_mean, grad_fisher, p_lambda).to('cpu') loss.backward() optimizer.step() ''' acc=evaluate_model(model, valid_data, batch_size, all_relations, device) if acc > best_acc: torch.save(model, model_path) best_model = torch.load(model_path) return best_model ''' return model