Beispiel #1
0
    def _run_for_one(self, t, corpora, topic_to_preprocessed, reference_timelines):
        logging.info(t)
        corpus = corpora[t]

        # train
        params = self.train(corpora, topic_to_preprocessed, reference_timelines, t)

        results_rouge = {}
        results_date_selection = {}
        returned_timelines = {}

        # predict
        for i, timeline in enumerate(reference_timelines[t].timelines):
            timeline_properties = self.get_timeline_properties(timeline)
            groundtruth = timelines.GroundTruth([timeline])

            pred = self.predict(corpus, topic_to_preprocessed[t], timeline_properties, params)

            # evaluate
            print("topic_to_preprocessed", topic_to_preprocessed)
            print("timeline_properties", timeline_properties)
            print("groundtruth", groundtruth)
            print("pred", pred)
            results_rouge[t + "_" + str(i)] = self.rouge.evaluate_all(pred, groundtruth)
            results_date_selection[t + "_" + str(i)] = dates.evaluate_dates(pred, groundtruth)
            returned_timelines[t + "_" + str(i)] = pred

        return results_rouge, results_date_selection, returned_timelines
Beispiel #2
0
    def get_timeline_properties(self, timeline):
        """
        Computes timeline properties for a given timeline.
        
        Params:
            timeline (tilse.data.timelines.Timeline): A timeline.
            
        Returns:
            A tilse.models.timeline_properties.TimelineProperties object,
            with:
                * `daily_summary_length` set to the output of `self.asses_length`,
                * `num_dates` set to the length of the input timeline in days,
                * `num_sentences` set to the length of the input timeline in sentences,
                * `start` and `end` set to the first and last days in the input timeline.
        """
        groundtruth = timelines.GroundTruth([timeline])

        groundtruth_dates = sorted(list(timeline.get_dates()))

        desired_timeline_length = len(groundtruth_dates)

        summary_length = self.summary_length_assessor(groundtruth)

        timeline_properties = TimelineProperties(
            summary_length,
            desired_timeline_length,
            timeline.get_number_of_sentences(),
            groundtruth_dates[0],
            groundtruth_dates[-1]
        )

        return timeline_properties
Beispiel #3
0
    def setUp(self):
        self.evaluator = rouge.TimelineRougeEvaluator()

        self.ground_truth = timelines.GroundTruth(
            [
                timelines.Timeline(
                    {
                        datetime.date(2010, 1, 1): ["timeline summarization ."],
                        datetime.date(2010, 1, 2): ["timeline summarization is awesome .",
                                                    "coreference resolution is , too ."],
                        datetime.date(2010, 1, 4): ["alignments are really nice"]
                    }
                ),
                timelines.Timeline(
                    {
                        datetime.date(2010, 1, 2): ["metrics are complicated ."]
                    }
                ),
            ]
        )

        self.output_same_number_of_dates = timelines.Timeline(
            {
                datetime.date(2010, 1, 2): ["timeline summarization ."],
                datetime.date(2010, 1, 3): ["doing some metric checks ."],
                datetime.date(2010, 1, 5): ["checks for alignments ."],
            }
        )

        self.output_same_number_of_dates_scores_higher_with_date_content_costs = timelines.Timeline(
            {
                datetime.date(2010, 1, 2): ["timeline summarization ."],
                datetime.date(2010, 1, 3): ["alignments are really nice"],
                datetime.date(2010, 1, 5): ["timeline summarization ."],
            }
        )

        self.output_less_dates = timelines.Timeline(
            {
                datetime.date(2010, 1, 2): ["timeline summarization ."],
                datetime.date(2010, 1, 3): ["doing some metric checks ."],
            }
        )

        self.output_more_dates = timelines.Timeline(
            {
                datetime.date(2010, 1, 2): ["timeline summarization ."],
                datetime.date(2010, 1, 3): ["doing some metric checks ."],
                datetime.date(2010, 1, 5): ["checks for alignments ."],
                datetime.date(2010, 1, 6): ["asdf"],
            }
        )
Beispiel #4
0
    def _run_test(self, test_to_run, ref_tls, names):
        results = {}

        for name, ref_tl in zip(names, ref_tls):
            tl = timelines.Timeline({})
            for date, sents in ref_tl.dates_to_summaries.items():
                tl.dates_to_summaries[date] = [sent for sent in sents]

            test_to_run(tl)

            results[name] = self.rouge.evaluate_all(
                tl, timelines.GroundTruth([ref_tl]))

        return scores.Scores(results)
Beispiel #5
0
    # filter dataset by keywords
    # if keyword_mapping is not None and keyword_mapping[topic] is not None:
    #     news_corpora[topic] = news_corpora[topic].filter_by_keywords_contained(keyword_mapping[topic])

    # read groundtruth timelines
    for filename in sorted(
            list(os.listdir(raw_directory + "/" + topic + "/timelines/"))):
        full_path = raw_directory + "/" + topic + "/timelines/" + filename

        temp_reference_timelines[topic].append(
            timelines.Timeline.from_file(
                codecs.open(full_path, "r", "utf-8", "replace")))

for topic in temp_reference_timelines:
    reference_timelines[topic] = timelines.GroundTruth(
        temp_reference_timelines[topic])

evaluator = rouge.TimelineRougeEvaluator(measures=["rouge_1", "rouge_2"],
                                         beta=1)

model_avg_scores = {}

for _config in configs:
    config_file = modelbase_dir + _config
    print(_config)
    print(config_file)
    config = json.load(open(config_file))
    config["rouge_computation"] = "reimpl"

    logging.info(config)