Esempio n. 1
0
def evaluate_switch(model, linear_layer, data, my_vocab):
    all_paragraphs = [
        build_paragraph(this_sample, my_vocab) for this_sample in data
    ]
    all_paragraph_lengths = [len(this_sample) for this_sample in data]
    sentence_cands = []
    for i in range(2000):
        sentence_cands += all_paragraphs[i][0]
    total_corrects = 0
    total_samples = 0
    for current_batch in range(int((len(data) - 1) / batch_size) + 1):
        paragraphs = all_paragraphs[current_batch *
                                    batch_size:(current_batch + 1) *
                                    batch_size]
        paragraph_lengths = all_paragraph_lengths[current_batch *
                                                  batch_size:(current_batch +
                                                              1) * batch_size]
        masked_paragraphs, masks = switch_sentence(paragraphs, sentence_cands)
        embeds = model(masked_paragraphs)
        #print(len(pos_score), len(neg_score))
        this_batch_size, doc_size, embed_dim = embeds.size()
        embeds = embeds.view(-1, embed_dim)
        sigmoid = nn.Sigmoid()
        scores = sigmoid(linear_layer(embeds).view(this_batch_size, doc_size))
        labels = torch.cat(masks).long().to(device)
        scores = filter_output(scores.view(-1), paragraph_lengths)
        preds = scores.ge(0.5).long().to(device)
        corrects = torch.sum(labels == preds)
        total_corrects += corrects
        total_samples += len(preds)
    return float(total_corrects) / total_samples
def train(train_data, dev_data, my_vocab, train_target, dev_target):
    #model = None
    embed_model = MyModel(my_vocab)
    #model = nn.DataParallel(model)
    embed_model = embed_model
    if classifier_embed_model_path is not None:
        embed_model = torch.load(classifier_embed_model_path)
    #criteria = torch.nn.CrossEntropyLoss()
    model = ClassificationModel(embed_model, hidden_dim * 2, num_classes)
    model = model.to(device)
    #criteria = torch.nn.MSELoss()
    criteria = torch.nn.CrossEntropyLoss()
    model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                    model.parameters()),
                             lr=learning_rate)
    best_acc = -1
    writer = SummaryWriter(exp_name)
    #print(len(train_data))
    all_paragraphs = [
        build_paragraph(this_sample, my_vocab) for this_sample in train_data
    ]
    all_paragraph_lengths = [len(this_sample) for this_sample in train_data]
    train_idx = list(range(len(train_data)))
    for epoch_i in range(num_epoch):
        random.shuffle(train_idx)
        total_loss = 0
        total_batch = 0
        all_paragraphs = [all_paragraphs[i] for i in train_idx]
        all_paragraph_lengths = [all_paragraph_lengths[i] for i in train_idx]
        train_target = [train_target[i] for i in train_idx]
        for current_batch in range(
                int((len(train_data) - 1) / batch_size) + 1):
            if current_batch % 100 == 0:
                print(current_batch)
            model_optim.zero_grad()
            paragraphs = all_paragraphs[current_batch *
                                        batch_size:(current_batch + 1) *
                                        batch_size]
            paragraph_lengths = all_paragraph_lengths[current_batch *
                                                      batch_size:
                                                      (current_batch + 1) *
                                                      batch_size]
            scores = model(paragraphs)
            targets = train_target[current_batch *
                                   batch_size:(current_batch + 1) * batch_size]
            labels = torch.tensor(targets).to(device)
            loss = criteria(scores, labels)
            #print(loss)
            total_loss += loss.item()
            total_batch += 1
            loss.backward()
            model_optim.step()
        acc = evaluate_classifier(model, dev_data, dev_target, my_vocab)
        if acc > best_acc:
            torch.save(model, classifier_model_path)
            best_acc = acc
        writer.add_scalar('accuracy', acc, epoch_i)
Esempio n. 3
0
def evaluate(model, data, my_vocab):
    all_paragraphs = [
        build_paragraph(this_sample, my_vocab) for this_sample in data
    ]
    all_paragraph_lengths = [len(this_sample) for this_sample in data]
    total_corrects = 0
    total_samples = 0
    for current_batch in range(int((len(data) - 1) / batch_size) + 1):
        paragraphs = all_paragraphs[current_batch *
                                    batch_size:(current_batch + 1) *
                                    batch_size]
        paragraph_lengths = all_paragraph_lengths[current_batch *
                                                  batch_size:(current_batch +
                                                              1) * batch_size]
        masked_paragraphs, masks, cand_pool = mask_sentence(paragraphs)
        outs, pool_sent_embeds = model(masked_paragraphs, cand_pool)
        corrects, batch_samples = compute_score(outs, pool_sent_embeds, masks)
        total_corrects += corrects
        total_samples += batch_samples
    return float(total_corrects) / total_samples
Esempio n. 4
0
def evaluate_sorter(model, linear_layer, data, my_vocab, cand_permuts):
    num_to_sort = 3
    all_paragraphs = [
        build_paragraph(this_sample, my_vocab) for this_sample in data
    ]
    all_paragraph_lengths = [len(this_sample) for this_sample in data]
    sentence_cands = []
    for i in range(min(2000, len(all_paragraphs))):
        sentence_cands += all_paragraphs[i][0]
    total_corrects = 0
    total_samples = 0
    for current_batch in range(int((len(data) - 1) / batch_size) + 1):
        paragraphs = all_paragraphs[current_batch *
                                    batch_size:(current_batch + 1) *
                                    batch_size]
        paragraph_lengths = all_paragraph_lengths[current_batch *
                                                  batch_size:(current_batch +
                                                              1) * batch_size]
        masked_paragraphs, start_idx, labels = local_sort_sentence(
            paragraphs, cand_permuts)
        if len(masked_paragraphs) < 1:
            continue
        embeds = model(masked_paragraphs)
        #print(len(pos_score), len(neg_score))
        this_batch_size, doc_size, embed_dim = embeds.size()
        idx_1, idx_2 = get_fetch_idx(this_batch_size, start_idx)
        #print(idx_1, idx_2)
        #print(embeds.size())
        embeds_to_sort = embeds[idx_1, idx_2, :].view(this_batch_size,
                                                      num_to_sort, -1)
        #print(embeds_to_sort.size())
        scores = linear_layer(embeds_to_sort)
        preds = scores.argmax(1)
        labels = torch.tensor(labels).to(device)
        corrects = torch.sum(labels == preds)
        total_corrects += corrects
        total_samples += len(preds)
    return float(total_corrects) / total_samples
Esempio n. 5
0
def evaluate_classifier(model, data, labels, my_vocab):
    all_paragraphs = [
        build_paragraph(this_sample, my_vocab) for this_sample in data
    ]
    all_paragraph_lengths = [len(this_sample) for this_sample in data]
    acc_total = 0
    correct_total = 0
    for current_batch in range(int((len(data) - 1) / batch_size) + 1):
        batch_data = data[current_batch * batch_size:(current_batch + 1) *
                          batch_size]
        paragraphs = all_paragraphs[current_batch *
                                    batch_size:(current_batch + 1) *
                                    batch_size]
        paragraph_lengths = all_paragraph_lengths[current_batch *
                                                  batch_size:(current_batch +
                                                              1) * batch_size]
        scores = model(paragraphs)
        targets = labels[current_batch * batch_size:(current_batch + 1) *
                         batch_size]
        targets = torch.tensor(targets).to(device)
        pred_idx = scores.argmax(-1)
        acc_total += len(targets)
        correct_total += torch.sum(targets == pred_idx)
    return float(correct_total) / acc_total
Esempio n. 6
0
def train(train_data, dev_data, my_vocab, train_target, dev_target,
          dev_target_txt):
    #model = None
    embed_model = MyModel(my_vocab)
    #model = nn.DataParallel(model)
    embed_model = embed_model
    if summarizer_embed_model_path is not None:
        embed_model = torch.load(summarizer_embed_model_path)
        #load_embed_model = torch.load(summarizer_embed_model_path)
        #sent_enc_stat = load_embed_model.sentence_encoder.state_dict()
        #embed_model.sentence_encoder.load_state_dict(sent_enc_stat)
    #criteria = torch.nn.CrossEntropyLoss()
    model = SummarizeModel(embed_model, hidden_dim * 2)
    model = model.to(device)
    criteria = torch.nn.MSELoss()
    model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                    model.parameters()),
                             lr=learning_rate)
    best_acc = -1
    writer = SummaryWriter(exp_name)
    #print(len(train_data))
    #all_paragraphs = [build_paragraph(this_sample, my_vocab)
    #                  for this_sample in train_data]
    #all_paragraph_lengths = [len(this_sample) for this_sample in train_data]
    train_idx = list(range(len(train_data)))
    for epoch_i in range(num_epoch):
        total_loss = 0
        total_batch = 0
        train_data = [train_data[i] for i in train_idx]
        #all_paragraphs = [all_paragraphs[i] for i in train_idx]
        #all_paragraph_lengths = [all_paragraph_lengths[i] for i in train_idx]
        train_target = [train_target[i] for i in train_idx]
        random.shuffle(train_idx)
        for current_batch in range(
                int((len(train_data) - 1) / batch_size) + 1):
            if current_batch % 100 == 0:
                print(current_batch)
            model_optim.zero_grad()
            batch_data = train_data[current_batch *
                                    batch_size:(current_batch + 1) *
                                    batch_size]
            targets = train_target[current_batch *
                                   batch_size:(current_batch + 1) * batch_size]
            paragraphs = [
                build_paragraph(this_sample, my_vocab)
                for this_sample in batch_data
            ]
            paragraph_lengths = [
                len(this_sample) for this_sample in batch_data
            ]
            #paragraphs = all_paragraphs[current_batch*batch_size:
            #                        (current_batch+1)*batch_size]
            #paragraph_lengths = all_paragraph_lengths[current_batch*batch_size:
            #                        (current_batch+1)*batch_size]

            scores = model(paragraphs)
            num_doc, doc_size = scores.size()
            labels = torch.zeros(num_doc, doc_size).to(device)
            for i, this_target in enumerate(targets):
                #print(labels[i], this_target)
                #print(this_target)
                #print(i)
                #print(labels.size())
                #print(labels)
                #print(labels[i])
                if len(this_target) > 0:
                    labels[i][this_target] = 1
            labels = filter_output(labels.view(-1), paragraph_lengths)
            scores = filter_output(scores.view(-1), paragraph_lengths)
            loss = criteria(scores, labels)
            #print(loss)
            total_loss += loss.item()
            total_batch += 1
            loss.backward()
            model_optim.step()
        acc, recall, scores = evaluate_summarizer(model, dev_data, dev_target,
                                                  my_vocab, dev_target_txt)
        with open(score_path + '_' + str(epoch_i) + '_score', 'w') as f_out:
            json.dump(scores, f_out)
        scores = [
            scores['rouge_1_f_score'], scores['rouge_2_f_score'],
            scores['rouge_l_f_score']
        ]
        torch.save(model, summarizer_model_path + '_' + str(epoch_i) + '.pt')
        writer.add_scalar('accuracy', acc, epoch_i)
        writer.add_scalar('recall', recall, epoch_i)
        writer.add_scalar('avg_loss', total_loss / total_batch, epoch_i)
        writer.add_scalar('rouge_1', scores[0], epoch_i)
        writer.add_scalar('rouge_2', scores[1], epoch_i)
        writer.add_scalar('rouge_l', scores[2], epoch_i)
Esempio n. 7
0
def evaluate_summarizer(model,
                        data,
                        labels,
                        my_vocab,
                        target_src,
                        is_eval=False):
    all_paragraphs = [
        build_paragraph(this_sample, my_vocab) for this_sample in data
    ]
    all_paragraph_lengths = [len(this_sample) for this_sample in data]
    sel_top_k = 3
    acc_total = 0
    recall_total = 0
    correct_total = 0
    predict_txt = []
    for current_batch in range(int((len(data) - 1) / batch_size) + 1):
        batch_data = data[current_batch * batch_size:(current_batch + 1) *
                          batch_size]
        paragraphs = all_paragraphs[current_batch *
                                    batch_size:(current_batch + 1) *
                                    batch_size]
        paragraph_lengths = all_paragraph_lengths[current_batch *
                                                  batch_size:(current_batch +
                                                              1) * batch_size]
        scores = model(paragraphs)
        if is_eval:
            this_batch_size, doc_size = scores.size()
            masks = gen_mask_based_length(this_batch_size, doc_size,
                                          paragraph_lengths)
            scores = scores * masks

        if labels is not None:
            targets = labels[current_batch * batch_size:(current_batch + 1) *
                             batch_size]
            _, pred_idx = scores.topk(sel_top_k, -1)
            for i, this_target in enumerate(targets):
                recall_total += len(this_target)
                acc_total += sel_top_k
                correct_total += len(
                    [pred for pred in pred_idx[i] if pred in this_target])
                pred_sentences = [
                    batch_data[i][j] for j in pred_idx[i]
                    if j < len(batch_data[i])
                ]
                if len(pred_sentences) == 0:
                    pred_sentences = batch_data[i][:sel_top_k]
                #print(pred_sentences)
                joined_sentences = [
                    ' '.join(sentence) for sentence in pred_sentences
                ]
                predict_txt.append('\n'.join(joined_sentences))
        else:
            _, pred_idx = scores.topk(sel_top_k, -1)
            for i in range(len(batch_data)):
                pred_sentences = [
                    batch_data[i][j] for j in pred_idx[i]
                    if j < len(batch_data[i])
                ]
                #pred_sentences = batch_data[i][:sel_top_k]
                if len(pred_sentences) == 0:
                    pred_sentences = batch_data[i][:sel_top_k]
                #print(pred_sentences)
                joined_sentences = [
                    ' '.join(sentence) for sentence in pred_sentences
                ]
                predict_txt.append('\n'.join(joined_sentences))

    #scores = compute_rouge_score(predict_txt, target_src)
    scores = rouge_eval(target_src, predict_txt)
    #print(scores)
    if labels is not None:
        return float(correct_total)/acc_total, float(correct_total)/recall_total,\
            scores
    else:
        return -1, -1, scores
    '''
Esempio n. 8
0
def train(train_data, dev_data, my_vocab, train_target, dev_target):
    #model = None
    num_to_sort = 3
    cand_permuts = list(itertools.permutations(list(range(num_to_sort))))
    model = MyModel(my_vocab)
    #model = nn.DataParallel(model)
    model = model.to(device)
    if model_to_load is not None:
        model = torch.load(model_to_load).to(device)
    #criteria = torch.nn.CrossEntropyLoss()
    model_optim = optim.Adam(filter(lambda p: p.requires_grad,
                                    model.parameters()),
                             lr=learning_rate)
    classification_layer = LinearRegressionModel(hidden_dim * 2, 1)
    #classification_layer = LocalSorterModel(hidden_dim*2, num_to_sort)
    classification_layer = classification_layer.to(device)
    classifier_optim = optim.Adam(classification_layer.parameters(),
                                  lr=learning_rate)
    best_acc = -1
    writer = SummaryWriter(exp_name)
    #print(len(train_data))
    all_paragraphs = [
        build_paragraph(this_sample, my_vocab) for this_sample in train_data
    ]
    all_paragraph_lengths = [len(this_sample) for this_sample in train_data]
    train_idx = list(range(len(train_data)))
    for epoch_i in range(num_epoch):
        #mask_loss = 0
        #replace_loss = 0
        switch_loss = 0
        #sorter_loss = 0
        total_batch = 0
        all_paragraphs = [all_paragraphs[i] for i in train_idx]
        all_paragraph_lengths = [all_paragraph_lengths[i] for i in train_idx]
        sentence_cands = []
        for i in range(min(10000, len(all_paragraphs))):
            sentence_cands += all_paragraphs[i][0]
        random.shuffle(train_idx)
        for current_batch in range(
                int((len(train_data) - 1) / batch_size) + 1):
            if current_batch % 100 == 0:
                print(current_batch)
            model_optim.zero_grad()
            classification_layer.zero_grad()
            paragraphs = all_paragraphs[current_batch *
                                        batch_size:(current_batch + 1) *
                                        batch_size]
            paragraph_lengths = all_paragraph_lengths[current_batch *
                                                      batch_size:
                                                      (current_batch + 1) *
                                                      batch_size]
            #loss = train_replace(model, classification_layer,
            #                     paragraphs, paragraph_lengths,
            #                     sentence_cands)
            #loss = train_mask(model, paragraphs, paragraph_lengths)
            loss = train_switch(model, classification_layer, paragraphs,
                                paragraph_lengths, sentence_cands)
            #loss = train_sorter(model, classification_layer,
            #                     paragraphs, paragraph_lengths,
            #                     cand_permuts)
            #print(loss)
            #mask_loss += loss.item()
            #replace_loss += loss.item()
            switch_loss += loss.item()
            #sorter_loss += loss.item()
            total_batch += 1
            loss.backward()
            model_optim.step()
            classifier_optim.step()
            '''
            cls_model_optim.zero_grad()
            targets = train_target[current_batch*batch_size:
                                   (current_batch+1)*batch_size]
            loss = train_cls_task(cls_model, paragraphs, paragraph_lengths, targets)
            cls_loss += loss.item()
            loss.backward()
            cls_model_optim.step()
            '''
        #mask_acc = evaluate(model, dev_data, my_vocab)
        #sorter_acc = evaluate_sorter(model, classification_layer, dev_data, my_vocab, cand_permuts)
        #replace_acc = evaluate_replace(model, classification_layer,
        #                                dev_data, my_vocab)
        switch_acc = evaluate_switch(model, classification_layer, dev_data,
                                     my_vocab)
        if switch_acc > best_acc:
            torch.save(model, model_path)
            best_acc = switch_acc
        #writer.add_scalar('mask_accuracy', mask_acc, epoch_i)
        #writer.add_scalar('avg_mask_loss', mask_loss/total_batch, epoch_i)
        #writer.add_scalar('replace_accuracy', replace_acc, epoch_i)
        #writer.add_scalar('avg_replace_loss', replace_loss/total_batch, epoch_i)
        writer.add_scalar('switch_accuracy', switch_acc, epoch_i)
        writer.add_scalar('avg_switch_loss', switch_loss / total_batch,
                          epoch_i)