コード例 #1
0
def run(tls_model, dataset, outpath):

    n_topics = len(dataset.collections)
    outputs = []

    for i, collection in enumerate(dataset.collections):
        topic = collection.name
        times = [a.time for a in collection.articles()]
        # setting start, end, L, K manually instead of from ground-truth
        collection.start = min(times)
        collection.end = max(times)
        l = 8 # timeline length (dates)
        k = 1 # number of sentences in each summary

        timeline = tls_model.predict(
            collection,
            max_dates=l,
            max_summary_sents=k,

        )

        print('*** TIMELINE ***')
        utils.print_tl(timeline)

        outputs.append(timeline.to_dict())

    if outpath:
        utils.write_json(outputs, outpath)
コード例 #2
0
def evaluate(tls_model,
             dataset,
             result_path,
             trunc_timelines=False,
             time_span_extension=0,
             word_mover_stop_words='nltk'):
    results = []
    metric = 'align_date_content_costs_many_to_one'
    evaluator = tilse_rouge.TimelineRougeEvaluator(
        measures=["rouge_1", "rouge_2"])
    n_topics = len(dataset.collections)

    for i, collection in enumerate(dataset.collections):

        ref_timelines = [
            TilseTimeline(tl.date_to_summaries) for tl in collection.timelines
        ]
        topic = collection.name
        n_ref = len(ref_timelines)

        if trunc_timelines:
            ref_timelines = data.truncate_timelines(ref_timelines, collection)

        for j, ref_timeline in enumerate(ref_timelines):
            print(
                f'topic {i + 1}/{n_topics}: {topic}, ref timeline {j + 1}/{n_ref}'
            )

            tls_model.load(ignored_topics=[collection.name])

            ref_dates = sorted(ref_timeline.dates_to_summaries)

            start, end = data.get_input_time_span(ref_dates,
                                                  time_span_extension)

            collection.start = start
            collection.end = end

            # utils.plot_date_stats(collection, ref_dates)

            l = len(ref_dates)
            k = data.get_average_summary_length(ref_timeline)

            pred_timeline_ = tls_model.predict(
                collection,
                max_dates=l,
                max_summary_sents=k,
                ref_tl=ref_timeline  # only oracles need this
            )

            print('*** PREDICTED ***')
            utils.print_tl(pred_timeline_)

            print('timeline done')
            pred_timeline = TilseTimeline(pred_timeline_.date_to_summaries)
            sys_len = len(pred_timeline.get_dates())
            ground_truth = TilseGroundTruth([ref_timeline])

            rouge_scores = get_scores(metric, pred_timeline, ground_truth,
                                      evaluator)
            date_scores = evaluate_dates(pred_timeline, ground_truth)
            wm_scores = get_wordmover_score(pred_timeline,
                                            ground_truth,
                                            word_mover_stop_words,
                                            device='cpu')
            dd_scores = date_dist_scores(pred_timeline, ground_truth)

            print('sys-len:', sys_len, 'gold-len:', l, 'gold-k:', k)

            print('Alignment-based ROUGE:')
            pprint(rouge_scores)
            print('Date selection:')
            pprint(date_scores)
            pprint(dd_scores)
            print('WordMover scores:')
            pprint(wm_scores)
            print('-' * 100)
            results.append((rouge_scores, date_scores, wm_scores, dd_scores,
                            pred_timeline_.to_dict()))

            print("Running average:")
            print(get_average_results(results))
            print()

    avg_results = get_average_results(results)
    print('Average results:')
    pprint(avg_results)
    output = {
        'average': avg_results,
        'results': results,
    }
    utils.write_json(output, result_path)