コード例 #1
0
def gen_fisher(model, train_data, all_relations):
    num_correct = 0
    #testing_data = testing_data[0:100]
    softmax_func = nn.LogSoftmax(0)
    loss_func = nn.NLLLoss()
    fisher_batch_size = 1
    batch_epoch = (len(train_data)-1)//fisher_batch_size+1
    fisher = None
    for i in range(batch_epoch):
        model.zero_grad()
        losses = []
        samples = train_data[i*fisher_batch_size:(i+1)*fisher_batch_size]
        questions, relations, relation_set_lengths = process_samples(
            samples, all_relations, device)
        #print('got data')
        ranked_questions, reverse_question_indexs = \
            ranking_sequence(questions)
        ranked_relations, reverse_relation_indexs =\
            ranking_sequence(relations)
        question_lengths = [len(question) for question in ranked_questions]
        relation_lengths = [len(relation) for relation in ranked_relations]
        #print(ranked_questions)
        pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
        pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
        #print(pad_questions)
        pad_questions = pad_questions.to(device)
        pad_relations = pad_relations.to(device)
        #print(pad_questions)

        model.init_hidden(device, sum(relation_set_lengths))
        all_scores = model(pad_questions, pad_relations, device,
                           reverse_question_indexs, reverse_relation_indexs,
                           question_lengths, relation_lengths)
        all_scores = all_scores.to('cpu')
        start_index = 0
        for length in relation_set_lengths:
            scores = all_scores[start_index:start_index+length]
            start_index += length
            #print(scores)
            losses.append(loss_func(softmax_func(scores).view(1, -1),
                                    torch.tensor([0])))
        loss_batch = sum(losses)
        #print(loss_batch)
        loss_batch.backward()
        grad_params = get_grad_params(model)
        #for param in grad_params:
         #   print(param.grad)
        if fisher is None:
            fisher = [param.grad**2/batch_epoch
                         for param in grad_params]
        else:
            fisher = [fisher[i]+param.grad**2/batch_epoch
                         for i,param in enumerate(grad_params)]

    return fisher
コード例 #2
0
def compute_diff_scores(model, samples, batch_size, all_relations, device):
    #testing_data = testing_data[0:100]
    for i in range((len(samples)-1)//batch_size+1):
        samples = samples[i*batch_size:(i+1)*batch_size]
        questions, relations, relation_set_lengths = \
            process_samples(samples, all_relations, device)
        model.init_hidden(device, sum(relation_set_lengths))
        ranked_questions, reverse_question_indexs = \
            ranking_sequence(questions)
        ranked_relations, reverse_relation_indexs =\
            ranking_sequence(relations)
        question_lengths = [len(question) for question in ranked_questions]
        relation_lengths = [len(relation) for relation in ranked_relations]
        #print(ranked_questions)
        pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
        pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
        all_scores = model(pad_questions, pad_relations, device,
                           reverse_question_indexs, reverse_relation_indexs,
                           question_lengths, relation_lengths)
        start_index = 0
        diff_scores = []
        #print('len of relation_set:', len(relation_set_lengths))
        for j in range(len(relation_set_lengths)):
            length = relation_set_lengths[j]
            '''
            cand_indexs = samples[j][1]
            gold_pos = np.where(np.array(cand_indexs)
                                == gold_relation_indexs[j])[0]
            print('gold pos', gold_pos)
            print('gold_index', gold_relation_indexs[j])
            print('cand index', cand_indexs)
            other_pos = np.where(np.array(cand_indexs)
                                 != gold_relation_indexs[j])[0]
            print('other_pos', other_pos)
            '''
            this_scores = all_scores[start_index:start_index + length]
            gold_score = this_scores[0]
            #print('gold score',gold_score)
            neg_scores = this_scores[1:]
            #print('neg score', neg_scores)
            diff_scores.append(gold_score - neg_scores.max())
            #print('scores:', all_scores[start_index:start_index+length])
            #print('cand indexs:', cand_indexs)
            #print('pred, true:',pred_index, gold_relation_indexs[j])
            start_index += length
        return diff_scores
コード例 #3
0
ファイル: train.py プロジェクト: thunlp/ContinualRE
def feed_samples(model,
                 samples,
                 loss_function,
                 all_relations,
                 device,
                 alignment_model=None):
    questions, relations, relation_set_lengths = process_samples(
        samples, all_relations, device)
    ranked_questions, alignment_question_indexs = \
        ranking_sequence(questions)
    ranked_relations, alignment_relation_indexs =\
        ranking_sequence(relations)
    question_lengths = [len(question) for question in ranked_questions]
    relation_lengths = [len(relation) for relation in ranked_relations]
    pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
    pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
    pad_questions = pad_questions.to(device)
    pad_relations = pad_relations.to(device)

    model.zero_grad()
    if alignment_model is not None:
        alignment_model.zero_grad()
    model.init_hidden(device, sum(relation_set_lengths))
    all_scores = model(pad_questions, pad_relations, device,
                       alignment_question_indexs, alignment_relation_indexs,
                       question_lengths, relation_lengths, alignment_model)
    all_scores = all_scores.to('cpu')
    pos_scores = []
    neg_scores = []
    pos_index = []
    start_index = 0
    for length in relation_set_lengths:
        pos_index.append(start_index)
        pos_scores.append(all_scores[start_index].expand(length - 1))
        neg_scores.append(all_scores[start_index + 1:start_index + length])
        start_index += length
    pos_scores = torch.cat(pos_scores)
    neg_scores = torch.cat(neg_scores)
    alignment_model_criterion = nn.MSELoss()

    loss = loss_function(
        pos_scores, neg_scores,
        torch.ones(sum(relation_set_lengths) - len(relation_set_lengths)))
    loss.backward()
    return all_scores, loss
コード例 #4
0
ファイル: train.py プロジェクト: thunlp/ContinualRE
def feed_samples(model, samples, loss_function, all_relations, device):
    questions, relations, relation_set_lengths = process_samples(
        samples, all_relations, device)
    #print('got data')
    ranked_questions, reverse_question_indexs = \
        ranking_sequence(questions)
    ranked_relations, reverse_relation_indexs =\
        ranking_sequence(relations)
    question_lengths = [len(question) for question in ranked_questions]
    relation_lengths = [len(relation) for relation in ranked_relations]
    #print(ranked_questions)
    pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
    pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
    #print(pad_questions)
    pad_questions = pad_questions.to(device)
    pad_relations = pad_relations.to(device)
    #print(pad_questions)

    model.zero_grad()
    model.init_hidden(device, sum(relation_set_lengths))
    all_scores = model(pad_questions, pad_relations, device,
                       reverse_question_indexs, reverse_relation_indexs,
                       question_lengths, relation_lengths)
    all_scores = all_scores.to('cpu')
    pos_scores = []
    neg_scores = []
    start_index = 0
    for length in relation_set_lengths:
        pos_scores.append(all_scores[start_index].expand(length-1))
        neg_scores.append(all_scores[start_index+1:start_index+length])
        start_index += length
    pos_scores = torch.cat(pos_scores)
    neg_scores = torch.cat(neg_scores)

    loss = loss_function(pos_scores, neg_scores,
                         torch.ones(sum(relation_set_lengths)-
                                    len(relation_set_lengths)))
    loss.backward()
コード例 #5
0
def feed_samples(model,
                 samples,
                 loss_function,
                 all_relations,
                 device,
                 reverse_model=None,
                 memory_que_embed=[],
                 memory_rel_embed=[]):
    questions, relations, relation_set_lengths = process_samples(
        samples, all_relations, device)
    #print('got data')
    ranked_questions, reverse_question_indexs = \
        ranking_sequence(questions)
    ranked_relations, reverse_relation_indexs =\
        ranking_sequence(relations)
    question_lengths = [len(question) for question in ranked_questions]
    relation_lengths = [len(relation) for relation in ranked_relations]
    #print(ranked_questions)
    pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
    pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
    #print(pad_questions)
    pad_questions = pad_questions.to(device)
    pad_relations = pad_relations.to(device)
    #print(pad_questions)

    model.zero_grad()
    if reverse_model is not None:
        reverse_model.zero_grad()
    model.init_hidden(device, sum(relation_set_lengths))
    all_scores, cur_que_embed, cur_rel_embed = model(pad_questions,
                                                     pad_relations,
                                                     device,
                                                     reverse_question_indexs,
                                                     reverse_relation_indexs,
                                                     question_lengths,
                                                     relation_lengths,
                                                     reverse_model,
                                                     ret_embeds=True)
    all_scores = all_scores.to('cpu')
    pos_scores = []
    neg_scores = []
    pos_index = []
    start_index = 0
    for length in relation_set_lengths:
        pos_index.append(start_index)
        pos_scores.append(all_scores[start_index].expand(length - 1))
        neg_scores.append(all_scores[start_index + 1:start_index + length])
        start_index += length
    pos_scores = torch.cat(pos_scores)
    neg_scores = torch.cat(neg_scores)
    reverse_model_criterion = nn.MSELoss()

    loss = loss_function(
        pos_scores, neg_scores,
        torch.ones(sum(relation_set_lengths) - len(relation_set_lengths)))
    #if reverse_model is not None and len(memory_que_embed) > 0 and False:
    #if False:
    alpha = 0.0
    if len(memory_que_embed) > 0:
        reverse_model = reverse_model.to(device)
        que_y = torch.from_numpy(memory_que_embed)
        rel_y = torch.from_numpy(memory_rel_embed)
        que_out = cur_que_embed[pos_index].to('cpu')
        rel_out = cur_rel_embed[pos_index].to('cpu')
        loss = loss*alpha + (1-alpha)*(reverse_model_criterion(que_out, que_y) +\
            reverse_model_criterion(rel_out, rel_y))

    loss.backward()
    return all_scores, loss
コード例 #6
0
 def process_samples(self, itr, paths):
     return utils.process_samples(itr,
                                  paths,
                                  calc_inf_gain=self.calc_inf_gain,
                                  inf_factor=self.inf_factor)
コード例 #7
0
def train(training_data,
          valid_data,
          vocabulary,
          embedding_dim,
          hidden_dim,
          device,
          batch_size,
          lr,
          model_path,
          embedding,
          all_relations,
          model=None,
          epoch=100,
          grad_means=[],
          grad_fishers=[],
          loss_margin=2.0):
    if model is None:
        torch.manual_seed(100)
        model = SimilarityModel(embedding_dim, hidden_dim, len(vocabulary),
                                np.array(embedding), 1, device)
    loss_function = nn.MarginRankingLoss(loss_margin)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    best_acc = 0
    for epoch_i in range(epoch):
        #print('epoch', epoch_i)
        #training_data = training_data[0:100]
        for i in range((len(training_data) - 1) // batch_size + 1):
            samples = training_data[i * batch_size:(i + 1) * batch_size]
            questions, relations, relation_set_lengths = process_samples(
                samples, all_relations, device)
            #print('got data')
            ranked_questions, reverse_question_indexs = \
                ranking_sequence(questions)
            ranked_relations, reverse_relation_indexs =\
                ranking_sequence(relations)
            question_lengths = [len(question) for question in ranked_questions]
            relation_lengths = [len(relation) for relation in ranked_relations]
            #print(ranked_questions)
            pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
            pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
            #print(pad_questions)
            pad_questions = pad_questions.to(device)
            pad_relations = pad_relations.to(device)
            #print(pad_questions)

            model.zero_grad()
            model.init_hidden(device, sum(relation_set_lengths))
            all_scores = model(pad_questions, pad_relations, device,
                               reverse_question_indexs,
                               reverse_relation_indexs, question_lengths,
                               relation_lengths)
            all_scores = all_scores.to('cpu')
            pos_scores = []
            neg_scores = []
            start_index = 0
            for length in relation_set_lengths:
                pos_scores.append(all_scores[start_index].expand(length - 1))
                neg_scores.append(all_scores[start_index + 1:start_index +
                                             length])
                start_index += length
            pos_scores = torch.cat(pos_scores)
            neg_scores = torch.cat(neg_scores)

            loss = loss_function(
                pos_scores, neg_scores,
                torch.ones(
                    sum(relation_set_lengths) - len(relation_set_lengths)))
            loss = loss.sum()
            #loss.to(device)
            #print(loss)
            for i in range(len(grad_means)):
                grad_mean = grad_means[i]
                grad_fisher = grad_fishers[i]
                #print(param_loss(model, grad_mean, grad_fisher, p_lambda))
                loss += param_loss(model, grad_mean, grad_fisher,
                                   p_lambda).to('cpu')
            loss.backward()
            optimizer.step()
            '''
        acc=evaluate_model(model, valid_data, batch_size, all_relations, device)
        if acc > best_acc:
            torch.save(model, model_path)
    best_model = torch.load(model_path)
    return best_model
    '''
    return model