Example #1
0
def evaluate(tls_model,
             dataset,
             result_path,
             trunc_timelines=False,
             time_span_extension=0,
             word_mover_stop_words='nltk'):
    results = []
    metric = 'align_date_content_costs_many_to_one'
    evaluator = tilse_rouge.TimelineRougeEvaluator(
        measures=["rouge_1", "rouge_2"])
    n_topics = len(dataset.collections)

    for i, collection in enumerate(dataset.collections):

        ref_timelines = [
            TilseTimeline(tl.date_to_summaries) for tl in collection.timelines
        ]
        topic = collection.name
        n_ref = len(ref_timelines)

        if trunc_timelines:
            ref_timelines = data.truncate_timelines(ref_timelines, collection)

        for j, ref_timeline in enumerate(ref_timelines):
            print(
                f'topic {i + 1}/{n_topics}: {topic}, ref timeline {j + 1}/{n_ref}'
            )

            tls_model.load(ignored_topics=[collection.name])

            ref_dates = sorted(ref_timeline.dates_to_summaries)

            start, end = data.get_input_time_span(ref_dates,
                                                  time_span_extension)

            collection.start = start
            collection.end = end

            # utils.plot_date_stats(collection, ref_dates)

            l = len(ref_dates)
            k = data.get_average_summary_length(ref_timeline)

            pred_timeline_ = tls_model.predict(
                collection,
                max_dates=l,
                max_summary_sents=k,
                ref_tl=ref_timeline  # only oracles need this
            )

            print('*** PREDICTED ***')
            utils.print_tl(pred_timeline_)

            print('timeline done')
            pred_timeline = TilseTimeline(pred_timeline_.date_to_summaries)
            sys_len = len(pred_timeline.get_dates())
            ground_truth = TilseGroundTruth([ref_timeline])

            rouge_scores = get_scores(metric, pred_timeline, ground_truth,
                                      evaluator)
            date_scores = evaluate_dates(pred_timeline, ground_truth)
            wm_scores = get_wordmover_score(pred_timeline,
                                            ground_truth,
                                            word_mover_stop_words,
                                            device='cpu')
            dd_scores = date_dist_scores(pred_timeline, ground_truth)

            print('sys-len:', sys_len, 'gold-len:', l, 'gold-k:', k)

            print('Alignment-based ROUGE:')
            pprint(rouge_scores)
            print('Date selection:')
            pprint(date_scores)
            pprint(dd_scores)
            print('WordMover scores:')
            pprint(wm_scores)
            print('-' * 100)
            results.append((rouge_scores, date_scores, wm_scores, dd_scores,
                            pred_timeline_.to_dict()))

            print("Running average:")
            print(get_average_results(results))
            print()

    avg_results = get_average_results(results)
    print('Average results:')
    pprint(avg_results)
    output = {
        'average': avg_results,
        'results': results,
    }
    utils.write_json(output, result_path)
Example #2
0
def evaluate(tls_model,
             dataset,
             result_path,
             trunc_timelines=False,
             time_span_extension=0):

    results = []
    metric = 'align_date_content_costs_many_to_one'
    evaluator = rouge.TimelineRougeEvaluator(measures=["rouge_1", "rouge_2"])
    n_topics = len(dataset.collections)
    ave_cluster = 0

    for i, collection in enumerate(dataset.collections):

        ref_timelines = [
            TilseTimeline(tl.date_to_summaries) for tl in collection.timelines
        ]
        topic = collection.name
        n_ref = len(ref_timelines)

        # only for entity
        if trunc_timelines:
            ref_timelines = data.truncate_timelines(ref_timelines, collection)

        for j, ref_timeline in enumerate(ref_timelines):

            print(
                f'topic {i+1}/{n_topics}: {topic}, ref timeline {j+1}/{n_ref}')

            tls_model.load(ignored_topics=[collection.name])

            ref_dates = sorted(ref_timeline.dates_to_summaries)
            #print("data to summaries = {}".format(ref_dates))

            start, end = data.get_input_time_span(ref_dates,
                                                  time_span_extension)

            collection.start = start
            collection.end = end
            print("name = {} start = {} end = {}".format(topic, start, end))

            #utils.plot_date_stats(collection, ref_dates)

            l = len(ref_dates)
            k = data.get_average_summary_length(ref_timeline)

            pred_timeline_, n_clusters = tls_model.predict(
                collection,
                max_dates=l,
                max_summary_sents=k,
                ref_tl=ref_timeline  # only oracles need this
            )
            ave_cluster = ave_cluster + n_clusters

            # print('*** PREDICTED ***')
            # utils.print_tl(pred_timeline_)

            print('timeline done')
            pred_timeline = TilseTimeline(pred_timeline_.date_to_summaries)
            sys_len = len(pred_timeline.get_dates())
            ground_truth = TilseGroundTruth([ref_timeline])

            rouge_scores = get_scores(metric, pred_timeline, ground_truth,
                                      evaluator)
            date_scores = evaluate_dates(pred_timeline, ground_truth)

            print('sys-len:', sys_len, 'gold-len:', l, 'gold-k:', k)

            print('Alignment-based ROUGE:')
            pprint(rouge_scores)
            print('Date selection:')
            pprint(date_scores)
            print('-' * 100)
            results.append(
                (rouge_scores, date_scores, pred_timeline_.to_dict()))

    avg_results = get_average_results(results)
    print('Average results:')
    pprint(avg_results)
    output = {
        'average_clusters': ave_cluster / len(dataset.collections),
        'average': avg_results,
        'results': results,
    }
    utils.write_json(output, result_path)