def gen_fisher(model, train_data, all_relations): num_correct = 0 #testing_data = testing_data[0:100] softmax_func = nn.LogSoftmax(0) loss_func = nn.NLLLoss() fisher_batch_size = 1 batch_epoch = (len(train_data)-1)//fisher_batch_size+1 fisher = None for i in range(batch_epoch): model.zero_grad() losses = [] samples = train_data[i*fisher_batch_size:(i+1)*fisher_batch_size] questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) #print('got data') ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) #print(pad_questions) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) #print(pad_questions) model.init_hidden(device, sum(relation_set_lengths)) all_scores = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths) all_scores = all_scores.to('cpu') start_index = 0 for length in relation_set_lengths: scores = all_scores[start_index:start_index+length] start_index += length #print(scores) losses.append(loss_func(softmax_func(scores).view(1, -1), torch.tensor([0]))) loss_batch = sum(losses) #print(loss_batch) loss_batch.backward() grad_params = get_grad_params(model) #for param in grad_params: # print(param.grad) if fisher is None: fisher = [param.grad**2/batch_epoch for param in grad_params] else: fisher = [fisher[i]+param.grad**2/batch_epoch for i,param in enumerate(grad_params)] return fisher
def compute_diff_scores(model, samples, batch_size, all_relations, device): #testing_data = testing_data[0:100] for i in range((len(samples)-1)//batch_size+1): samples = samples[i*batch_size:(i+1)*batch_size] questions, relations, relation_set_lengths = \ process_samples(samples, all_relations, device) model.init_hidden(device, sum(relation_set_lengths)) ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) all_scores = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths) start_index = 0 diff_scores = [] #print('len of relation_set:', len(relation_set_lengths)) for j in range(len(relation_set_lengths)): length = relation_set_lengths[j] ''' cand_indexs = samples[j][1] gold_pos = np.where(np.array(cand_indexs) == gold_relation_indexs[j])[0] print('gold pos', gold_pos) print('gold_index', gold_relation_indexs[j]) print('cand index', cand_indexs) other_pos = np.where(np.array(cand_indexs) != gold_relation_indexs[j])[0] print('other_pos', other_pos) ''' this_scores = all_scores[start_index:start_index + length] gold_score = this_scores[0] #print('gold score',gold_score) neg_scores = this_scores[1:] #print('neg score', neg_scores) diff_scores.append(gold_score - neg_scores.max()) #print('scores:', all_scores[start_index:start_index+length]) #print('cand indexs:', cand_indexs) #print('pred, true:',pred_index, gold_relation_indexs[j]) start_index += length return diff_scores
def feed_samples(model, samples, loss_function, all_relations, device, alignment_model=None): questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) ranked_questions, alignment_question_indexs = \ ranking_sequence(questions) ranked_relations, alignment_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) model.zero_grad() if alignment_model is not None: alignment_model.zero_grad() model.init_hidden(device, sum(relation_set_lengths)) all_scores = model(pad_questions, pad_relations, device, alignment_question_indexs, alignment_relation_indexs, question_lengths, relation_lengths, alignment_model) all_scores = all_scores.to('cpu') pos_scores = [] neg_scores = [] pos_index = [] start_index = 0 for length in relation_set_lengths: pos_index.append(start_index) pos_scores.append(all_scores[start_index].expand(length - 1)) neg_scores.append(all_scores[start_index + 1:start_index + length]) start_index += length pos_scores = torch.cat(pos_scores) neg_scores = torch.cat(neg_scores) alignment_model_criterion = nn.MSELoss() loss = loss_function( pos_scores, neg_scores, torch.ones(sum(relation_set_lengths) - len(relation_set_lengths))) loss.backward() return all_scores, loss
def feed_samples(model, samples, loss_function, all_relations, device): questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) #print('got data') ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) #print(pad_questions) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) #print(pad_questions) model.zero_grad() model.init_hidden(device, sum(relation_set_lengths)) all_scores = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths) all_scores = all_scores.to('cpu') pos_scores = [] neg_scores = [] start_index = 0 for length in relation_set_lengths: pos_scores.append(all_scores[start_index].expand(length-1)) neg_scores.append(all_scores[start_index+1:start_index+length]) start_index += length pos_scores = torch.cat(pos_scores) neg_scores = torch.cat(neg_scores) loss = loss_function(pos_scores, neg_scores, torch.ones(sum(relation_set_lengths)- len(relation_set_lengths))) loss.backward()
def feed_samples(model, samples, loss_function, all_relations, device, reverse_model=None, memory_que_embed=[], memory_rel_embed=[]): questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) #print('got data') ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) #print(pad_questions) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) #print(pad_questions) model.zero_grad() if reverse_model is not None: reverse_model.zero_grad() model.init_hidden(device, sum(relation_set_lengths)) all_scores, cur_que_embed, cur_rel_embed = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths, reverse_model, ret_embeds=True) all_scores = all_scores.to('cpu') pos_scores = [] neg_scores = [] pos_index = [] start_index = 0 for length in relation_set_lengths: pos_index.append(start_index) pos_scores.append(all_scores[start_index].expand(length - 1)) neg_scores.append(all_scores[start_index + 1:start_index + length]) start_index += length pos_scores = torch.cat(pos_scores) neg_scores = torch.cat(neg_scores) reverse_model_criterion = nn.MSELoss() loss = loss_function( pos_scores, neg_scores, torch.ones(sum(relation_set_lengths) - len(relation_set_lengths))) #if reverse_model is not None and len(memory_que_embed) > 0 and False: #if False: alpha = 0.0 if len(memory_que_embed) > 0: reverse_model = reverse_model.to(device) que_y = torch.from_numpy(memory_que_embed) rel_y = torch.from_numpy(memory_rel_embed) que_out = cur_que_embed[pos_index].to('cpu') rel_out = cur_rel_embed[pos_index].to('cpu') loss = loss*alpha + (1-alpha)*(reverse_model_criterion(que_out, que_y) +\ reverse_model_criterion(rel_out, rel_y)) loss.backward() return all_scores, loss
def process_samples(self, itr, paths): return utils.process_samples(itr, paths, calc_inf_gain=self.calc_inf_gain, inf_factor=self.inf_factor)
def train(training_data, valid_data, vocabulary, embedding_dim, hidden_dim, device, batch_size, lr, model_path, embedding, all_relations, model=None, epoch=100, grad_means=[], grad_fishers=[], loss_margin=2.0): if model is None: torch.manual_seed(100) model = SimilarityModel(embedding_dim, hidden_dim, len(vocabulary), np.array(embedding), 1, device) loss_function = nn.MarginRankingLoss(loss_margin) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=lr) best_acc = 0 for epoch_i in range(epoch): #print('epoch', epoch_i) #training_data = training_data[0:100] for i in range((len(training_data) - 1) // batch_size + 1): samples = training_data[i * batch_size:(i + 1) * batch_size] questions, relations, relation_set_lengths = process_samples( samples, all_relations, device) #print('got data') ranked_questions, reverse_question_indexs = \ ranking_sequence(questions) ranked_relations, reverse_relation_indexs =\ ranking_sequence(relations) question_lengths = [len(question) for question in ranked_questions] relation_lengths = [len(relation) for relation in ranked_relations] #print(ranked_questions) pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions) pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations) #print(pad_questions) pad_questions = pad_questions.to(device) pad_relations = pad_relations.to(device) #print(pad_questions) model.zero_grad() model.init_hidden(device, sum(relation_set_lengths)) all_scores = model(pad_questions, pad_relations, device, reverse_question_indexs, reverse_relation_indexs, question_lengths, relation_lengths) all_scores = all_scores.to('cpu') pos_scores = [] neg_scores = [] start_index = 0 for length in relation_set_lengths: pos_scores.append(all_scores[start_index].expand(length - 1)) neg_scores.append(all_scores[start_index + 1:start_index + length]) start_index += length pos_scores = torch.cat(pos_scores) neg_scores = torch.cat(neg_scores) loss = loss_function( pos_scores, neg_scores, torch.ones( sum(relation_set_lengths) - len(relation_set_lengths))) loss = loss.sum() #loss.to(device) #print(loss) for i in range(len(grad_means)): grad_mean = grad_means[i] grad_fisher = grad_fishers[i] #print(param_loss(model, grad_mean, grad_fisher, p_lambda)) loss += param_loss(model, grad_mean, grad_fisher, p_lambda).to('cpu') loss.backward() optimizer.step() ''' acc=evaluate_model(model, valid_data, batch_size, all_relations, device) if acc > best_acc: torch.save(model, model_path) best_model = torch.load(model_path) return best_model ''' return model