
    if train_percent + dev_percent >= 1.:
            'ERROR! Train data percentage plus dev data percentage is {}! Make sure the sum is below 1.0!'
            .format(train_percent + dev_percent))

    BERT_VEC_LENGTH = 1024  # change this to 768 if you use bert-base
    deep_model, optimiser = build_model(model_type, BERT_VEC_LENGTH * 2,
    if 'gpu' in device:'cuda')

    # read human scores and vectors for summaries/docs, and split the train/dev/test set
    sorted_scores = read_sorted_scores()
    train, dev, test, all = parse_split_data(sorted_scores, train_percent,

    train_pairs = build_pairs(train)
    dev_pairs = build_pairs(dev)
    test_pairs = build_pairs(test)
    print(len(train_pairs), len(dev_pairs), len(test_pairs))

    # read bert vectors
    with open('data/doc_summ_bert_vectors.pkl', 'rb') as ff:
        all_vec_dic = pickle.load(ff)

    pcc_list = []
    weights_list = []
    for ii in range(epoch_num):
Exemple #2
def encode_doc_summ(stem=False, remove_stop=False):
    bert_tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
    bert_model = BertModel.from_pretrained('bert-large-uncased')

    sorted_scores = read_sorted_scores()
    input_articles, _ = read_articles()

    stopwords_list = set(stopwords.words("english"))
    stemmer = PorterStemmer()
    vec_dic = {}

    for i, (article_id, scores_list) in tqdm(enumerate(sorted_scores.items())):
        vec_dic[article_id] = {}
        article = [
            entry['article'] for entry in input_articles
            if entry['id'] == article_id
        ref_summ = scores_list[0]['ref']

        if stem and remove_stop:
            sys_summs = [
                " ".join(
                    sent2stokens_wostop(s['sys_summ'], stemmer, stopwords_list,
                                        'english', True)) for s in scores_list
            ref_summ = " ".join(
                sent2stokens_wostop(ref_summ, stemmer, stopwords_list,
                                    'english', True))
            article = " ".join(
                sent2stokens_wostop(article, stemmer, stopwords_list,
                                    'english', True))
        elif not stem and remove_stop:
            sys_summs = [
                " ".join(
                    sent2tokens_wostop(s['sys_summ'], stopwords_list,
                                       'english', True)) for s in scores_list
            ref_summ = " ".join(
                sent2tokens_wostop(ref_summ, stopwords_list, 'english', True))
            article = " ".join(
                sent2tokens_wostop(article, stopwords_list, 'english', True))
        elif not remove_stop and stem:
            sys_summs = [
                " ".join(sent2stokens(s['sys_summ'], stemmer, 'english', True))
                for s in scores_list
            ref_summ = " ".join(
                sent2stokens(ref_summ, stemmer, 'english', True))
            article = " ".join(sent2stokens(article, stemmer, 'english', True))
            sys_summs = [s['sys_summ'] for s in scores_list]

        summ_ids = [s['summ_id'] for s in scores_list]

        # clean text
        sys_summs = [text_normalization(s) for s in sys_summs]
        ref_summ = text_normalization(ref_summ)
        article = text_normalization(article)

        vec_dic[article_id]['article'] = raw_bert_encoder(
            bert_model, bert_tokenizer, [article])
        vec_dic[article_id]['ref'] = raw_bert_encoder(bert_model,
        for i, sid in enumerate(summ_ids):
            vec_dic[article_id]['sys_summ{}'.format(sid)] = raw_bert_encoder(
                bert_model, bert_tokenizer, [sys_summs[i]])

    save_file_name = 'doc_summ_bert_vectors'
    if stem: save_file_name + '_stem'
    if remove_stop: save_file_name + '_removeStop'
    save_file_name += '.pkl'
    with open('data/' + save_file_name, 'wb') as ff:
        pickle.dump(vec_dic, ff)
Exemple #3
def evaluate_metric(metric, stem, remove_stop, with_ref, prompt='overall'):
    ''' metrics that use reference summaries '''
    assert metric in [
        'ROUGE-1-F', 'ROUGE-1-R', 'ROUGE-2-F', 'ROUGE-2-R', 'ROUGE-L-F',
        'ROUGE-L-R', 'ROUGE-SU*-F', 'ROUGE-SU*-R', 'bleu-1', 'bleu-2',
        'bleu-3', 'bleu-4', 'bleu-5', 'meteor', 'infersent', 'bert-raw',
        'bert-sts', 'bert-nli', 'bert-human', 'mover-1', 'mover-2', 'mover-smd'
    stemmed_str = "_stem" if stem else ""
    stop_str = "_removestop" if remove_stop else ""
    if with_ref:
        ranks_file_path = os.path.join(
            'outputs', 'wref_{}{}{}_{}_rank_correlation.csv'.format(
                metric, stemmed_str, stop_str, prompt))
        ranks_file_path = os.path.join(
            'outputs', 'woref_{}{}{}_{}_rank_correlation.csv'.format(
                metric, stemmed_str, stop_str, prompt))

    #if os.path.isfile(ranks_file_path):
    #return ranks_file_path

    ranks_file = open(ranks_file_path, 'w')

    sorted_scores = read_sorted_scores()
    input_articles, _ = read_articles()
    corr_data = np.zeros((len(sorted_scores), 3))

    stopwords_list = set(stopwords.words("english"))
    stemmer = PorterStemmer()

    if metric.startswith('infersent'):
        from scorer.auto_metrics.infersent_metric import InferSentScorer
        infers = InferSentScorer()
    elif metric.startswith('sent2vec'):
        from scorer.auto_metrics.sent2vec_metric import Sent2Vec
        s2v = Sent2Vec()
    elif metric.startswith('bert'):
        if 'human' in metric:
            rewarder = Rewarder(os.path.join(MODEL_WEIGHT_DIR, 'sample.model'))
        elif 'sts' in metric:
            bert_model = SentenceTransformer('bert-large-nli-stsb-mean-tokens')
        elif 'nli' in metric:
            bert_model = SentenceTransformer('bert-large-nli-mean-tokens')
            #raw BERT
            bert_tokenizer = BertTokenizer.from_pretrained(
            bert_model = BertModel.from_pretrained('bert-large-uncased')
    elif metric.startswith('mover'):
            'Make sure that your have started the mover server. Find details at'
        from summ_eval.client import EvalClient
        mover_scorer = EvalClient()

    for i, (article_id, scores_list) in tqdm(enumerate(sorted_scores.items())):
        human_ranks = [s['scores'][prompt] for s in scores_list]
        if len(human_ranks) < 2: continue
        ref_summ = scores_list[0]['ref']
        article = [
            entry['article'] for entry in input_articles
            if entry['id'] == article_id

        if stem and remove_stop:
            sys_summs = [
                " ".join(
                    sent2stokens_wostop(s['sys_summ'], stemmer, stopwords_list,
                                        'english', True)) for s in scores_list
            ref_summ = " ".join(
                sent2stokens_wostop(ref_summ, stemmer, stopwords_list,
                                    'english', True))
            article = " ".join(
                sent2stokens_wostop(article, stemmer, stopwords_list,
                                    'english', True))
        elif not stem and remove_stop:
            sys_summs = [
                " ".join(
                    sent2tokens_wostop(s['sys_summ'], stopwords_list,
                                       'english', True)) for s in scores_list
            ref_summ = " ".join(
                sent2tokens_wostop(ref_summ, stopwords_list, 'english', True))
            article = " ".join(
                sent2tokens_wostop(article, stopwords_list, 'english', True))
        elif not remove_stop and stem:
            sys_summs = [
                " ".join(sent2stokens(s['sys_summ'], stemmer, 'english', True))
                for s in scores_list
            ref_summ = " ".join(
                sent2stokens(ref_summ, stemmer, 'english', True))
            article = " ".join(sent2stokens(article, stemmer, 'english', True))
            sys_summs = [s['sys_summ'] for s in scores_list]

        summ_ids = [s['summ_id'] for s in scores_list]
        sys_summs = [text_normalization(s) for s in sys_summs]
        ref_summ = text_normalization(ref_summ)
        article = text_normalization(article)

        if 'rouge' in metric.lower():
            auto_metric_ranks = []
            for ss in sys_summs:
                rouge_scorer = RougeScorer(ROUGE_DIR, BASE_DIR)
                if with_ref:
                        rouge_scorer(ss, ref_summ)[metric])
                    auto_metric_ranks.append(rouge_scorer(ss, article)[metric])
        elif metric.startswith('bleu'):
            n = int(metric.split('-')[1])
            if with_ref:
                auto_metric_ranks = [
                    bleu(ss, [ref_summ], n, smooth=False) for ss in sys_summs
                auto_metric_ranks = [
                    bleu(ss, [article], n, smooth=False) for ss in sys_summs
        elif metric.startswith('meteor'):
            if with_ref:
                auto_metric_ranks = [
                    meteor(ss, [ref_summ]) for ss in sys_summs
                auto_metric_ranks = [meteor(ss, [article]) for ss in sys_summs]
        elif metric.startswith('infersent'):
            if with_ref:
                auto_metric_ranks = [infers(ss, ref_summ) for ss in sys_summs]
                auto_metric_ranks = [infers(ss, article) for ss in sys_summs]
        elif metric.startswith('sent2vec'):
            if with_ref:
                auto_metric_ranks = [
                    s2v.score(ss, ref_summ) for ss in sys_summs
                auto_metric_ranks = [
                    s2v.score(ss, article) for ss in sys_summs
        elif metric.startswith('bert'):
            if 'human' in metric:
                if with_ref:
                    auto_metric_ranks = [
                        rewarder(ref_summ, ss) for ss in sys_summs
                    auto_metric_ranks = [
                        rewarder(article, ss) for ss in sys_summs
            elif 'sts' in metric or 'nli' in metric:
                if with_ref:
                    auto_metric_ranks = [
                        sts_bert_rewarder(bert_model, ss, ref_summ)
                        for ss in sys_summs
                    auto_metric_ranks = [
                        sts_bert_rewarder(bert_model, ss, article)
                        for ss in sys_summs
            else:  #raw BERT encoder
                if with_ref:
                    auto_metric_ranks = [
                        raw_bert_rewarder(bert_model, bert_tokenizer, ss,
                                          ref_summ) for ss in sys_summs
                    auto_metric_ranks = [
                        raw_bert_rewarder(bert_model, bert_tokenizer, ss,
                                          article) for ss in sys_summs
        elif metric.startswith('mover'):
            if '1' in metric: mm = 'wmd_1'
            elif '2' in metric: mm = 'wmd_2'
            else: mm = 'smd'
            if with_ref: cases = [[[ss], [ref_summ], mm] for ss in sys_summs]
                cases = [[[ss], sent_tokenize(article), mm]
                         for ss in sys_summs]
            auto_metric_ranks = mover_scorer.eval(cases)['0']

        for sid, amr, hr in zip(summ_ids, auto_metric_ranks, human_ranks):
                article_id, sid, hr, amr))

        spearmanr_result = spearmanr(human_ranks, auto_metric_ranks)
        pearsonr_result = pearsonr(human_ranks, auto_metric_ranks)
        kendalltau_result = kendalltau(human_ranks, auto_metric_ranks)
        corr_data[i, :] = [
            spearmanr_result[0], pearsonr_result[0], kendalltau_result[0]

    corr_mean_all = np.nanmean(corr_data, axis=0)
    print("Correlation mean on all data spearman/pearsonr/kendall: {}".format(


    return ranks_file_path