def feed_samples(model, samples, loss_function, device,
                 word_alignment_model=None, sent_alignment_model=None):
    sentences, targets, words = process_samples(samples, device)

    # rank sentences with lengths
    ranked_sentences, alignment_sentence_indexs = ranking_sequence(sentences)
    ranked_words, alignment_words_indexs = ranking_sequence(words)
    sentence_lengths = [len(sentence) for sentence in ranked_sentences]
    words_lengths = [len(word) for word in ranked_words]

    # Padding
    pad_sentences = torch.nn.utils.rnn.pad_sequence(ranked_sentences)
    pad_sentences = pad_sentences.to(device)
    pad_words = torch.nn.utils.rnn.pad_sequence(ranked_words)
    pad_words = pad_words.to(device)

    model.zero_grad()
    if word_alignment_model is not None:
        word_alignment_model.zero_grad()
        sent_alignment_model.zero_grad()
    output = model(pad_sentences, pad_words,
                   alignment_sentence_indexs, alignment_words_indexs,
                   sentence_lengths, words_lengths,
                   word_alignment_model, sent_alignment_model)

    loss = loss_function(output, targets)
    loss.backward()
    return loss
def evaluate_model(model,
                   testing_data,
                   batch_size,
                   device,
                   word_alignment_model=None,
                   sent_alignment_model=None):
    num_correct = 0
    for i in range((len(testing_data) - 1) // batch_size + 1):
        samples = testing_data[i * batch_size:(i + 1) * batch_size]
        sentences, targets, words = \
            process_testing_samples(samples, device)

        ranked_words, alignment_words_indexs = \
            ranking_sequence(words)
        ranked_sentences, alignment_sentence_indexs = \
            ranking_sequence(sentences)

        words_lengths = [len(word) for word in ranked_words]
        sentence_lengths = [len(sentence) for sentence in ranked_sentences]

        pad_sentences = torch.nn.utils.rnn.pad_sequence(ranked_sentences)
        pad_sentences = pad_sentences.to(device)
        pad_words = torch.nn.utils.rnn.pad_sequence(ranked_words)
        pad_words = pad_words.to(device)

        output = model(pad_sentences, pad_words, alignment_sentence_indexs,
                       alignment_words_indexs, sentence_lengths, words_lengths,
                       word_alignment_model, sent_alignment_model)

        for j, one_pred in enumerate(output):
            if torch.argmax(one_pred) == targets[j]:
                num_correct += 1

    return float(num_correct) / len(testing_data)
Esempio n. 3
0
def gen_fisher(model, train_data, all_relations):
    num_correct = 0
    #testing_data = testing_data[0:100]
    softmax_func = nn.LogSoftmax(0)
    loss_func = nn.NLLLoss()
    fisher_batch_size = 1
    batch_epoch = (len(train_data)-1)//fisher_batch_size+1
    fisher = None
    for i in range(batch_epoch):
        model.zero_grad()
        losses = []
        samples = train_data[i*fisher_batch_size:(i+1)*fisher_batch_size]
        questions, relations, relation_set_lengths = process_samples(
            samples, all_relations, device)
        #print('got data')
        ranked_questions, reverse_question_indexs = \
            ranking_sequence(questions)
        ranked_relations, reverse_relation_indexs =\
            ranking_sequence(relations)
        question_lengths = [len(question) for question in ranked_questions]
        relation_lengths = [len(relation) for relation in ranked_relations]
        #print(ranked_questions)
        pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
        pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
        #print(pad_questions)
        pad_questions = pad_questions.to(device)
        pad_relations = pad_relations.to(device)
        #print(pad_questions)

        model.init_hidden(device, sum(relation_set_lengths))
        all_scores = model(pad_questions, pad_relations, device,
                           reverse_question_indexs, reverse_relation_indexs,
                           question_lengths, relation_lengths)
        all_scores = all_scores.to('cpu')
        start_index = 0
        for length in relation_set_lengths:
            scores = all_scores[start_index:start_index+length]
            start_index += length
            #print(scores)
            losses.append(loss_func(softmax_func(scores).view(1, -1),
                                    torch.tensor([0])))
        loss_batch = sum(losses)
        #print(loss_batch)
        loss_batch.backward()
        grad_params = get_grad_params(model)
        #for param in grad_params:
         #   print(param.grad)
        if fisher is None:
            fisher = [param.grad**2/batch_epoch
                         for param in grad_params]
        else:
            fisher = [fisher[i]+param.grad**2/batch_epoch
                         for i,param in enumerate(grad_params)]

    return fisher
Esempio n. 4
0
def get_rel_embed(model,
                  sample_list,
                  all_relations,
                  alignment_model,
                  before_alignment=False):
    ret_rel_embeds = []
    for i in range((len(sample_list) - 1) // batch_size + 1):
        samples = sample_list[i * batch_size:(i + 1) * batch_size]
        relations = []
        for item in samples:
            this_relation = torch.tensor(all_relations[item[0]],
                                         dtype=torch.long).to(device)
            relations.append(this_relation)
        #print(len(relations))
        model.init_hidden(device, len(relations))
        ranked_relations, alignment_relation_indexs = \
            ranking_sequence(relations)
        relation_lengths = [len(relation) for relation in ranked_relations]
        #print(ranked_relations)
        pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
        rel_embeds = model.compute_rel_embed(pad_relations, relation_lengths,
                                             alignment_relation_indexs,
                                             alignment_model, before_alignment)
        ret_rel_embeds.append(rel_embeds.detach().cpu().numpy())
    return np.concatenate(ret_rel_embeds)
Esempio n. 5
0
def save_rel_embeds(model, all_seen_rels, all_relations, file_name):
    rel_embeds = {}
    if model is not None and len(all_seen_rels) > 0:
        for i in range((len(all_seen_rels)-1)//batch_size+1):
            seen_rels_batch = all_seen_rels[i*batch_size:(i+1)*batch_size]
            relations = [torch.tensor(all_relations[i],
                                          dtype=torch.long).to(device)
                             for i in seen_rels_batch]
            model.init_hidden(device, len(relations))
            ranked_relations, reverse_relation_indexs = \
                ranking_sequence(relations)
            relation_lengths = [len(relation) for relation in ranked_relations]
            #print(ranked_relations)
            pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
            new_rel_embeds = model.compute_rel_embed(pad_relations, relation_lengths,
                                                 reverse_relation_indexs)
            for i, rel in enumerate(seen_rels_batch):
                rel_embeds[rel] = new_rel_embeds[i].cpu().numpy()
        rels = list(rel_embeds.keys())
        values = rel_embeds.values()
        with open(file_name, 'w') as writer:
            writer.write(str(rels)+'\n')
            for embed in values:
                to_write = [round(x, 6) for x in embed]
                writer.write(str(to_write)+'\n')
Esempio n. 6
0
def compute_diff_scores(model, samples, batch_size, all_relations, device):
    #testing_data = testing_data[0:100]
    for i in range((len(samples)-1)//batch_size+1):
        samples = samples[i*batch_size:(i+1)*batch_size]
        questions, relations, relation_set_lengths = \
            process_samples(samples, all_relations, device)
        model.init_hidden(device, sum(relation_set_lengths))
        ranked_questions, reverse_question_indexs = \
            ranking_sequence(questions)
        ranked_relations, reverse_relation_indexs =\
            ranking_sequence(relations)
        question_lengths = [len(question) for question in ranked_questions]
        relation_lengths = [len(relation) for relation in ranked_relations]
        #print(ranked_questions)
        pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
        pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
        all_scores = model(pad_questions, pad_relations, device,
                           reverse_question_indexs, reverse_relation_indexs,
                           question_lengths, relation_lengths)
        start_index = 0
        diff_scores = []
        #print('len of relation_set:', len(relation_set_lengths))
        for j in range(len(relation_set_lengths)):
            length = relation_set_lengths[j]
            '''
            cand_indexs = samples[j][1]
            gold_pos = np.where(np.array(cand_indexs)
                                == gold_relation_indexs[j])[0]
            print('gold pos', gold_pos)
            print('gold_index', gold_relation_indexs[j])
            print('cand index', cand_indexs)
            other_pos = np.where(np.array(cand_indexs)
                                 != gold_relation_indexs[j])[0]
            print('other_pos', other_pos)
            '''
            this_scores = all_scores[start_index:start_index + length]
            gold_score = this_scores[0]
            #print('gold score',gold_score)
            neg_scores = this_scores[1:]
            #print('neg score', neg_scores)
            diff_scores.append(gold_score - neg_scores.max())
            #print('scores:', all_scores[start_index:start_index+length])
            #print('cand indexs:', cand_indexs)
            #print('pred, true:',pred_index, gold_relation_indexs[j])
            start_index += length
        return diff_scores
Esempio n. 7
0
def feed_samples(model,
                 samples,
                 loss_function,
                 all_relations,
                 device,
                 alignment_model=None):
    questions, relations, relation_set_lengths = process_samples(
        samples, all_relations, device)
    ranked_questions, alignment_question_indexs = \
        ranking_sequence(questions)
    ranked_relations, alignment_relation_indexs =\
        ranking_sequence(relations)
    question_lengths = [len(question) for question in ranked_questions]
    relation_lengths = [len(relation) for relation in ranked_relations]
    pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
    pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
    pad_questions = pad_questions.to(device)
    pad_relations = pad_relations.to(device)

    model.zero_grad()
    if alignment_model is not None:
        alignment_model.zero_grad()
    model.init_hidden(device, sum(relation_set_lengths))
    all_scores = model(pad_questions, pad_relations, device,
                       alignment_question_indexs, alignment_relation_indexs,
                       question_lengths, relation_lengths, alignment_model)
    all_scores = all_scores.to('cpu')
    pos_scores = []
    neg_scores = []
    pos_index = []
    start_index = 0
    for length in relation_set_lengths:
        pos_index.append(start_index)
        pos_scores.append(all_scores[start_index].expand(length - 1))
        neg_scores.append(all_scores[start_index + 1:start_index + length])
        start_index += length
    pos_scores = torch.cat(pos_scores)
    neg_scores = torch.cat(neg_scores)
    alignment_model_criterion = nn.MSELoss()

    loss = loss_function(
        pos_scores, neg_scores,
        torch.ones(sum(relation_set_lengths) - len(relation_set_lengths)))
    loss.backward()
    return all_scores, loss
Esempio n. 8
0
def evaluate_model(model, testing_data, batch_size, all_relations, device,
                   reverse_model=None):
    #print('start evaluate')
    num_correct = 0
    #testing_data = testing_data[0:100]
    for i in range((len(testing_data)-1)//batch_size+1):
        samples = testing_data[i*batch_size:(i+1)*batch_size]
        gold_relation_indexs, questions, relations, relation_set_lengths = \
            process_testing_samples(samples, all_relations, device)
        model.init_hidden(device, sum(relation_set_lengths))
        ranked_questions, reverse_question_indexs = \
            ranking_sequence(questions)
        ranked_relations, reverse_relation_indexs =\
            ranking_sequence(relations)
        question_lengths = [len(question) for question in ranked_questions]
        relation_lengths = [len(relation) for relation in ranked_relations]
        #print(ranked_questions)
        pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
        pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
        all_scores = model(pad_questions, pad_relations, device,
                           reverse_question_indexs, reverse_relation_indexs,
                           question_lengths, relation_lengths, reverse_model)
        start_index = 0
        pred_indexs = []
        #print('len of relation_set:', len(relation_set_lengths))
        for j in range(len(relation_set_lengths)):
            length = relation_set_lengths[j]
            cand_indexs = samples[j][1]
            pred_index = (cand_indexs[
                all_scores[start_index:start_index+length].argmax()])
            if pred_index == gold_relation_indexs[j]:
                num_correct += 1
            #print('scores:', all_scores[start_index:start_index+length])
            #print('cand indexs:', cand_indexs)
            #print('pred, true:',pred_index, gold_relation_indexs[j])
            start_index += length
    #print(cand_scores[-1])
    #print('num correct:', num_correct)
    #print('correct rate:', float(num_correct)/len(testing_data))
    return float(num_correct)/len(testing_data)
Esempio n. 9
0
def feed_samples(model, samples, loss_function, all_relations, device):
    questions, relations, relation_set_lengths = process_samples(
        samples, all_relations, device)
    #print('got data')
    ranked_questions, reverse_question_indexs = \
        ranking_sequence(questions)
    ranked_relations, reverse_relation_indexs =\
        ranking_sequence(relations)
    question_lengths = [len(question) for question in ranked_questions]
    relation_lengths = [len(relation) for relation in ranked_relations]
    #print(ranked_questions)
    pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
    pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
    #print(pad_questions)
    pad_questions = pad_questions.to(device)
    pad_relations = pad_relations.to(device)
    #print(pad_questions)

    model.zero_grad()
    model.init_hidden(device, sum(relation_set_lengths))
    all_scores = model(pad_questions, pad_relations, device,
                       reverse_question_indexs, reverse_relation_indexs,
                       question_lengths, relation_lengths)
    all_scores = all_scores.to('cpu')
    pos_scores = []
    neg_scores = []
    start_index = 0
    for length in relation_set_lengths:
        pos_scores.append(all_scores[start_index].expand(length-1))
        neg_scores.append(all_scores[start_index+1:start_index+length])
        start_index += length
    pos_scores = torch.cat(pos_scores)
    neg_scores = torch.cat(neg_scores)

    loss = loss_function(pos_scores, neg_scores,
                         torch.ones(sum(relation_set_lengths)-
                                    len(relation_set_lengths)))
    loss.backward()
Esempio n. 10
0
def update_rel_embed(model, all_seen_rels, all_relations, rel_embeds):
    if model is not None and len(all_seen_rels) > 0:
        for i in range((len(all_seen_rels)-1)//batch_size+1):
            seen_rels_batch = all_seen_rels[i*batch_size:(i+1)*batch_size]
            relations = [torch.tensor(all_relations[i],
                                          dtype=torch.long).to(device)
                             for i in seen_rels_batch]
            model.init_hidden(device, len(relations))
            ranked_relations, reverse_relation_indexs = \
                ranking_sequence(relations)
            relation_lengths = [len(relation) for relation in ranked_relations]
            #print(ranked_relations)
            pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
            new_rel_embeds = model.compute_rel_embed(pad_relations, relation_lengths,
                                                 reverse_relation_indexs)
            for i, rel in enumerate(seen_rels_batch):
                rel_embeds[rel] = new_rel_embeds[i].cpu().numpy()
Esempio n. 11
0
def feed_samples(model,
                 samples,
                 loss_function,
                 all_relations,
                 device,
                 reverse_model=None,
                 memory_que_embed=[],
                 memory_rel_embed=[]):
    questions, relations, relation_set_lengths = process_samples(
        samples, all_relations, device)
    #print('got data')
    ranked_questions, reverse_question_indexs = \
        ranking_sequence(questions)
    ranked_relations, reverse_relation_indexs =\
        ranking_sequence(relations)
    question_lengths = [len(question) for question in ranked_questions]
    relation_lengths = [len(relation) for relation in ranked_relations]
    #print(ranked_questions)
    pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
    pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
    #print(pad_questions)
    pad_questions = pad_questions.to(device)
    pad_relations = pad_relations.to(device)
    #print(pad_questions)

    model.zero_grad()
    if reverse_model is not None:
        reverse_model.zero_grad()
    model.init_hidden(device, sum(relation_set_lengths))
    all_scores, cur_que_embed, cur_rel_embed = model(pad_questions,
                                                     pad_relations,
                                                     device,
                                                     reverse_question_indexs,
                                                     reverse_relation_indexs,
                                                     question_lengths,
                                                     relation_lengths,
                                                     reverse_model,
                                                     ret_embeds=True)
    all_scores = all_scores.to('cpu')
    pos_scores = []
    neg_scores = []
    pos_index = []
    start_index = 0
    for length in relation_set_lengths:
        pos_index.append(start_index)
        pos_scores.append(all_scores[start_index].expand(length - 1))
        neg_scores.append(all_scores[start_index + 1:start_index + length])
        start_index += length
    pos_scores = torch.cat(pos_scores)
    neg_scores = torch.cat(neg_scores)
    reverse_model_criterion = nn.MSELoss()

    loss = loss_function(
        pos_scores, neg_scores,
        torch.ones(sum(relation_set_lengths) - len(relation_set_lengths)))
    #if reverse_model is not None and len(memory_que_embed) > 0 and False:
    #if False:
    alpha = 0.0
    if len(memory_que_embed) > 0:
        reverse_model = reverse_model.to(device)
        que_y = torch.from_numpy(memory_que_embed)
        rel_y = torch.from_numpy(memory_rel_embed)
        que_out = cur_que_embed[pos_index].to('cpu')
        rel_out = cur_rel_embed[pos_index].to('cpu')
        loss = loss*alpha + (1-alpha)*(reverse_model_criterion(que_out, que_y) +\
            reverse_model_criterion(rel_out, rel_y))

    loss.backward()
    return all_scores, loss
Esempio n. 12
0
def train(training_data,
          valid_data,
          vocabulary,
          embedding_dim,
          hidden_dim,
          device,
          batch_size,
          lr,
          model_path,
          embedding,
          all_relations,
          model=None,
          epoch=100,
          grad_means=[],
          grad_fishers=[],
          loss_margin=2.0):
    if model is None:
        torch.manual_seed(100)
        model = SimilarityModel(embedding_dim, hidden_dim, len(vocabulary),
                                np.array(embedding), 1, device)
    loss_function = nn.MarginRankingLoss(loss_margin)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    best_acc = 0
    for epoch_i in range(epoch):
        #print('epoch', epoch_i)
        #training_data = training_data[0:100]
        for i in range((len(training_data) - 1) // batch_size + 1):
            samples = training_data[i * batch_size:(i + 1) * batch_size]
            questions, relations, relation_set_lengths = process_samples(
                samples, all_relations, device)
            #print('got data')
            ranked_questions, reverse_question_indexs = \
                ranking_sequence(questions)
            ranked_relations, reverse_relation_indexs =\
                ranking_sequence(relations)
            question_lengths = [len(question) for question in ranked_questions]
            relation_lengths = [len(relation) for relation in ranked_relations]
            #print(ranked_questions)
            pad_questions = torch.nn.utils.rnn.pad_sequence(ranked_questions)
            pad_relations = torch.nn.utils.rnn.pad_sequence(ranked_relations)
            #print(pad_questions)
            pad_questions = pad_questions.to(device)
            pad_relations = pad_relations.to(device)
            #print(pad_questions)

            model.zero_grad()
            model.init_hidden(device, sum(relation_set_lengths))
            all_scores = model(pad_questions, pad_relations, device,
                               reverse_question_indexs,
                               reverse_relation_indexs, question_lengths,
                               relation_lengths)
            all_scores = all_scores.to('cpu')
            pos_scores = []
            neg_scores = []
            start_index = 0
            for length in relation_set_lengths:
                pos_scores.append(all_scores[start_index].expand(length - 1))
                neg_scores.append(all_scores[start_index + 1:start_index +
                                             length])
                start_index += length
            pos_scores = torch.cat(pos_scores)
            neg_scores = torch.cat(neg_scores)

            loss = loss_function(
                pos_scores, neg_scores,
                torch.ones(
                    sum(relation_set_lengths) - len(relation_set_lengths)))
            loss = loss.sum()
            #loss.to(device)
            #print(loss)
            for i in range(len(grad_means)):
                grad_mean = grad_means[i]
                grad_fisher = grad_fishers[i]
                #print(param_loss(model, grad_mean, grad_fisher, p_lambda))
                loss += param_loss(model, grad_mean, grad_fisher,
                                   p_lambda).to('cpu')
            loss.backward()
            optimizer.step()
            '''
        acc=evaluate_model(model, valid_data, batch_size, all_relations, device)
        if acc > best_acc:
            torch.save(model, model_path)
    best_model = torch.load(model_path)
    return best_model
    '''
    return model