Exemple #1
0
def main(args):
    dec_dir = join(args.decode_dir, 'output')
    with open(join(args.decode_dir, 'log.json')) as f:
        split = json.loads(f.read())['split']
    ref_dir = join(args.data_dir, 'refs', split)
    assert exists(ref_dir)

    if args.rouge:
        dec_pattern = r'(\d+).dec'
        ref_pattern = '#ID#.ref'
        output = eval_rouge(dec_pattern,
                            dec_dir,
                            ref_pattern,
                            ref_dir,
                            rouge_path=args.rouge_path)
        metric = 'rouge'
    else:
        dec_pattern = '[0-9]+.dec'
        ref_pattern = '[0-9]+.ref'
        output = eval_meteor(dec_pattern,
                             dec_dir,
                             ref_pattern,
                             ref_dir,
                             meteor_path=args.meteor_path)
        metric = 'meteor'
    print(output)
    with open(join(args.decode_dir, '{}.txt'.format(metric)), 'w') as f:
        f.write(output)
Exemple #2
0
def main(dec_dir, ref_dir):
    dec_pattern = r'test-(\d+).txt'
    ref_pattern = 'test-#ID#.txt'
    output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir)
    metric = 'rouge'
    print(output)
    with open(join(dec_dir, '{}.txt'.format(metric)), 'w') as f:
        f.write(output)
Exemple #3
0
def upper_bound():
    for split in ['val', 'test']:
        print(split)
        dec_rerank = '/home/yhj/emnlp/baseline/upperbound_%s/output_rerank' % split
        dec_order = '/home/yhj/emnlp/baseline/upperbound_%s/output_order' % split
        path = os.path.join('/home/yhj/dataset/emnlp/', split)
        if not os.path.exists(dec_rerank):
            os.makedirs(dec_rerank)

        if not os.path.exists(dec_order):
            os.makedirs(dec_order)

        for file in iter_files(path):
            paper = json.load(open(file))
            name = os.path.basename(file)
            name, _ = os.path.splitext(name)
            sents = [paper['article'][i] for i in paper['extracted']]
            with open(os.path.join(dec_rerank, name + '.dec'), 'w') as f:
                f.write('\n'.join(sents))
            order = sorted(paper['extracted'])
            sents = [paper['article'][i] for i in order]
            with open(os.path.join(dec_order, name + '.dec'), 'w') as f:
                f.write('\n'.join(sents))

        ref_dir = os.path.join('/home/yhj/dataset/emnlp/refs/', split)

        dec_pattern = r'(\d+).dec'
        ref_pattern = '#ID#.ref'

        output = eval_rouge(dec_pattern, dec_rerank, ref_pattern, ref_dir)
        print('%s rerank:' % split)
        print(output)
        with open(
                '/home/yhj/emnlp/baseline/upperbound_%s/rouge_rerank.txt' %
                split, 'w') as f:
            f.write(output)

        output = eval_rouge(dec_pattern, dec_order, ref_pattern, ref_dir)
        print('%s order:' % split)
        print(output)
        with open(
                '/home/yhj/emnlp/baseline/upperbound_%s/rouge_order.txt' %
                split, 'w') as f:
            f.write(output)
Exemple #4
0
def main(args):
    dec_dir = args.decode_dir
    ref_dir = join(_REF_DIR, 'reference')
    if args.rouge:
        dec_pattern = r'(\d+).dec'
        ref_pattern = '#ID#.ref'
        output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir)
    else:
        dec_pattern = '[0-9]+.dec'
        ref_pattern = '[0-9]+.ref'
        output = eval_meteor(dec_pattern, dec_dir, ref_pattern, ref_dir)
    print(output)
Exemple #5
0
def main(args):
    dec_dir = args.decode_dir
    ref_dir = join(_REF_DIR, 'reference')
    if args.rouge:
        dec_pattern = r'(\d+).dec'
        ref_pattern = '#ID#.ref'
        output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir)
    else:
        dec_pattern = '[0-9]+.dec'
        ref_pattern = '[0-9]+.ref'
        output = eval_meteor(dec_pattern, dec_dir, ref_pattern, ref_dir)
    print(output)
def main(args):
    dec_dir = join(args.decode_dir, 'output')
    ref_dir = args.reference_dir
    assert exists(ref_dir)

    dec_pattern = r'(\d+).dec'
    ref_pattern = '#ID#.ref'
    output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir)
    metric = 'rouge'
    print(output)
    with open(join(args.decode_dir, '{}.txt'.format(metric)), 'w') as f:
        f.write(output)
def main(args):
    dec_dir = args.decode_file

    ref_dir = args.ref_file
    assert exists(ref_dir)

    if args.rouge:
        output = eval_rouge(dec_dir, ref_dir)
        metric = 'rouge'
    else:
        output = eval_meteor(dec_dir, ref_dir)
        metric = 'meteor'
    print(output)
    with open('{}.txt'.format(metric), 'w') as f:
        f.write(output)
Exemple #8
0
def main(args):
    dec_dir = join(args.decode_dir, 'output')
    with open(join(args.decode_dir, 'log.json')) as f:
        split = json.loads(f.read())['split']
    ref_dir = join(_DATA_DIR, 'refs', split)
    assert exists(ref_dir)

    if args.rouge:
        dec_pattern = r'(\d+).dec'
        ref_pattern = '#ID#.ref'
        output = eval_rouge(dec_pattern,
                            dec_dir,
                            ref_pattern,
                            ref_dir,
                            force=args.force)
        metric = 'rouge'
    elif args.meteor:
        dec_pattern = '[0-9]+.dec'
        ref_pattern = '[0-9]+.ref'
        output = eval_meteor(dec_pattern,
                             dec_dir,
                             ref_pattern,
                             ref_dir,
                             force=args.force)
        metric = 'meteor'
    elif args.novel_ngrams:
        eval_novel_ngrams_args = {
            'data_dir': join(_DATA_DIR, split),
            'data_pattern': '[0-9]+.json',
            'dec_dir': dec_dir,
            'dec_pattern': '[0-9]+.dec',
            'ref_dir': ref_dir,
            'ref_pattern': '[0-9]+.ref',
        }

        output = json.dumps(eval_novel_ngrams(**eval_novel_ngrams_args),
                            indent=4,
                            ensure_ascii=False)
        metric = 'novel-ngrams'
    else:
        raise NotImplementedError()

    print(output)
    with open(join(args.decode_dir, '{}.txt'.format(metric)), 'w') as f:
        f.write(output)
def main(args):
    dec_dir = join(args.decode_dir, 'output')
    with open(join(args.decode_dir, 'log.json')) as f:
        split = json.loads(f.read())['split']
    ref_dir = join(_DATA_DIR, 'refs', split)
    assert exists(ref_dir)

    if args.rouge:
        dec_pattern = r'(\d+).dec'
        ref_pattern = '#ID#.ref'
        output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir)
        metric = 'rouge'
    else:
        dec_pattern = '[0-9]+.dec'
        ref_pattern = '[0-9]+.ref'
        output = eval_meteor(dec_pattern, dec_dir, ref_pattern, ref_dir)
        metric = 'meteor'
    print(output)
    with open(join(args.decode_dir, '{}.txt'.format(metric)), 'w') as f:
        f.write(output)
Exemple #10
0
def calc_official_rouge(dec_dir, name):
    if name == 'val':
        ref_dir = cm.REF04
    else:
        ref_dir = cm.REF11
    print(f'{name}: ref_dir={ref_dir}')
    dec_pattern = r'(\d+).dec'
    ref_pattern = '#ID#.[A-Z].ref'
    output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir)
    # print(output)
    for line in output.split('\n'):
        if line.startswith('1 ROUGE-1 Average_F'):
            r1 = float(line.split()[3])
        if line.startswith('1 ROUGE-2 Average_F'):
            r2 = float(line.split()[3])
        if line.startswith('1 ROUGE-L Average_F'):
            rl = float(line.split()[3])
        if line.startswith('1 ROUGE-SU4 Average_F'):
            rsu4 = float(line.split()[3])
    R = {'R-1': r1, 'R-2': r2, 'R-L': rl, 'R-SU4': rsu4}
    print(R, '\n')
    return R
def main(args):
    dec_dir = join(args.decode_dir, 'output_top{}'.format(args.n_ext))
    if not exists(dec_dir):
        make_summaries(args.decode_dir, args.n_ext)
    with open(join(args.decode_dir, 'log.json')) as f:
        split = json.loads(f.read())['split']
    ref_dir = join(_DATA_DIR, 'refs', split)
    assert exists(ref_dir)

    if args.rouge:
        dec_pattern = r'(\d+).dec'
        ref_pattern = '#ID#.ref'
        output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir)
        metric = 'rouge'
    else:
        dec_pattern = '[0-9]+.dec'
        ref_pattern = '[0-9]+.ref'
        output = eval_meteor(dec_pattern, dec_dir, ref_pattern, ref_dir)
        metric = 'meteor'
    print(output)
    with open(join(args.decode_dir, 'top{}_{}.txt'.format(args.n_ext, metric)),
              'w') as f:
        f.write(output)
Exemple #12
0
            0: textrank_summarizer(text, stemmer, language, sentences_count),
            1: lexrank_summarizer(text, stemmer, language, sentences_count),
            2: luhn_summarizer(text, stemmer, language, sentences_count),
            3: reduction_summarizer(text, stemmer, language, sentences_count),
            4: sumbasic_summarizer(text, stemmer, language, sentences_count),
            5: kl_summarizer(text, stemmer, language, sentences_count),
            6: edmundson_summarizer(text, stemmer, language, sentences_count)
        }
        return switcher.get(sum_index)
    return switch(sum_index)

if __name__ == "__main__":
    DATA_DIR = '../ProcessData/vietnews2/test'
    REF_DIR = '../ProcessData/vietnews2/refs'
    DECODE_DIR = 'decode/reduction'
    decoded_dir = os.path.join(DECODE_DIR, 'decoded')
    if not os.path.exists(DECODE_DIR):
        os.makedirs(DECODE_DIR)
    if not os.path.exists(decoded_dir):
        os.makedirs(decoded_dir)
    for filename in os.listdir(DATA_DIR):
        with open(os.path.join(DATA_DIR, filename), 'r') as f:
            data = json.load(f)
            text = '\n'.join(data['article'])
        summ = summarize(text, sum_index=3)
        with open(os.path.join(decoded_dir, filename.split('.')[0] + '.dec'), 'w') as f:
            f.write(summ)
    rouge = eval_rouge(decoded_dir, REF_DIR)
    print(rouge)
    with open(os.path.join(DECODE_DIR, 'rouge.txt'), 'w') as f:
        f.write(rouge)
Exemple #13
0
def test(args, split):
    ext_dir = args.path
    ckpts = sort_ckpt(ext_dir)
    
    # setup loader
    def coll(batch):
        articles = list(filter(bool, batch))
        return articles
    dataset = DecodeDataset(split)

    n_data = len(dataset)
    loader = DataLoader(
             dataset, batch_size=args.batch, shuffle=False, num_workers=4,
             collate_fn=coll
    )

    # decode and evaluate top 5 models
    os.mkdir(join(args.path, 'decode'))
    os.mkdir(join(args.path, 'ROUGE'))
    for i in range(min(5, len(ckpts))):
        print('Start loading checkpoint {} !'.format(ckpts[i]))
        cur_ckpt = torch.load(
                   join(ext_dir, 'ckpt/{}'.format(ckpts[i]))
        )['state_dict']
        extractor = Extractor(ext_dir, cur_ckpt, args.emb_type, cuda=args.cuda)
        save_path = join(args.path, 'decode/{}'.format(ckpts[i]))
        os.mkdir(save_path)

        # decoding
        ext_list = []
        cur_idx = 0
        start = time()
        with torch.no_grad():
            for raw_article_batch in loader:
                tokenized_article_batch = map(tokenize(None, args.emb_type), raw_article_batch)
                for raw_art_sents in tokenized_article_batch:
                    ext_idx = extractor(raw_art_sents)
                    ext_list.append(ext_idx)
                    cur_idx += 1
                    print('{}/{} ({:.2f}%) decoded in {} seconds\r'.format(
                          cur_idx, n_data, cur_idx/n_data*100, timedelta(seconds=int(time()-start))
                    ), end='')
        print()

        # write files
        for file_idx, ext_ids in enumerate(ext_list):
            dec = []
            data_path = join(DATA_DIR, '{}/{}.json'.format(split, file_idx))
            with open(data_path) as f:
                data = json.loads(f.read())
            n_ext = 2 if data['source'] == 'CNN' else 3
            n_ext = min(n_ext, len(data['article']))
            for j in range(n_ext):
                sent_idx = ext_ids[j]
                dec.append(data['article'][sent_idx])
            with open(join(save_path, '{}.dec'.format(file_idx)), 'w') as f:
                for sent in dec:
                    print(sent, file=f)
        
        # evaluate current model
        print('Starting evaluating ROUGE !')
        dec_path = save_path
        ref_path = join(DATA_DIR, 'refs/{}'.format(split))
        ROUGE = eval_rouge(dec_path, ref_path)
        print(ROUGE)
        with open(join(args.path, 'ROUGE/{}.txt'.format(ckpts[i])), 'w') as f:
            print(ROUGE, file=f)
Exemple #14
0
from evaluate import eval_rouge
import os

if __name__ == "__main__":
    for each in ['decode_baseline', 'decode_introduction']:
        print(each)
        dec_dir = '/home/yhj/long-summarization/logroot/%s/decoded' % each
        ref_dir = '/home/yhj/long-summarization/logroot/%s/reference' % each
        dec_pattern = r'(\d+).decoded'
        ref_pattern = '#ID#.reference'
        output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir)
        print(output)
        with open(os.path.join(os.path.dirname(dec_dir), 'rouge.txt'),
                  'w') as f:
            f.write(output)