예제 #1
0
    def _compute_average(self):
        topics = sorted(list(self.mapping.keys()))
        modes = [k for k in self.mapping[list(topics)[0]]]

        self.mapping["average_score"] = OrderedDict()

        for mode in modes:
            self.mapping["average_score"][mode] = {}
            for measure in self.mapping[list(topics)[0]][mode]:
                self.mapping["average_score"][mode][measure] = {}

                mode_scores = [self.mapping[t][mode][measure] for t in topics]
                self.mapping["average_score"][mode][measure]["precision"] = \
                    sum([s["precision"] for s in mode_scores]) / len(mode_scores)
                self.mapping["average_score"][mode][measure]["recall"] = \
                    sum([s["recall"] for s in mode_scores]) / len(mode_scores)

                self.mapping["average_score"][mode][measure]["f_score"] = util.get_f_score(
                    self.mapping["average_score"][mode][measure]["precision"],
                    self.mapping["average_score"][mode][measure]["recall"],
                    beta=self.beta
                )

        all_date_scores = [self.date_mapping[t] for t in topics]

        self.date_mapping["average_score"] = OrderedDict()
        self.date_mapping["average_score"]["precision"] = sum([x["precision"] for x in all_date_scores]) / len(topics)
        self.date_mapping["average_score"]["recall"] = sum([x["recall"] for x in all_date_scores]) / len(topics)
        self.date_mapping["average_score"]["f_score"] = util.get_f_score(
            self.date_mapping["average_score"]["precision"],
            self.date_mapping["average_score"]["recall"],
            beta=self.beta
        )
예제 #2
0
    def evaluate_concat(self, predicted_timeline, reference_timelines):
        """ Evaluate a predicted timeline w.r.t. a set of reference timelines using the
        'concat' ROUGE variant.

        This variant first concatenates all daily summaries of the respective timelines. The
        resulting documents are then evaluated using the ROUGE measure.

        Args:
            predicted_timeline (data.timelines.Timeline): A timeline.
            reference_timelines (data.timelines.GroundTruth): A ground truth of timelines.

        Returns:
            A dict(str, dict(str, str)) object mapping each ROUGE measure in `self.measures`
            to a dict that maps 'precision', 'recall' and 'f_score' to the corresponding values,
            e.g.

                {"rouge_1": {"precision": 1.0, "recall": 1.0, "f_score": 1.0}}
        """
        pred_sents = []

        for date in sorted(list(predicted_timeline.get_dates())):
            pred_sents.extend(
                [sent.split() for sent in predicted_timeline[date]])

        ref_sents = {}

        for i, timeline in enumerate(reference_timelines.timelines):
            ref_sents[str(i)] = []
            timeline_dates = sorted(list(timeline.get_dates()))
            for date in timeline_dates:
                ref_sents[str(i)].extend(
                    [sent.split() for sent in timeline[date]])

        scores = self._get_rouge_counts(pred_sents, ref_sents)

        output_scores = {}

        for measure in self.measures:

            prec = scores[measure]["prec_num"]
            rec = scores[measure]["rec_num"]

            if (scores[measure]["prec_denom"] > 0):
                prec = scores[measure]["prec_num"] / scores[measure][
                    "prec_denom"]

            if (scores[measure]["rec_denom"] > 0):
                rec = scores[measure]["rec_num"] / scores[measure]["rec_denom"]

            output_scores[measure] = {
                "precision": prec,
                "recall": rec,
                "f_score": util.get_f_score(prec, rec, beta=self.beta)
            }

        return output_scores
예제 #3
0
    def _evaluate_per_day_mapping_micro(
            self,
            predicted_timeline,
            reference_timelines,
            compute_costs,
            optimize_assignment):
        precision_numerator = collections.defaultdict(list)
        precision_denominator = collections.defaultdict(list)

        recall_numerator = collections.defaultdict(list)
        recall_denominator = collections.defaultdict(list)

        pred_dates = sorted(list(predicted_timeline.get_dates()))
        ref_dates = sorted(list(reference_timelines.get_dates()))

        prec_costs = compute_costs(pred_dates, ref_dates, predicted_timeline,
                                   reference_timelines, axis=0)
        rec_costs = compute_costs(pred_dates, ref_dates, predicted_timeline,
                                  reference_timelines, axis=1)

        prec_row, prec_col = optimize_assignment(prec_costs)
        rec_row, rec_col = optimize_assignment(rec_costs)

        # precision
        for row, col in zip(prec_row, prec_col):
            pred_date = pred_dates[row]
            ref_date = ref_dates[col]

            temp_groundtruth = reference_timelines[ref_date]
            groundtruth = {}
            for name in temp_groundtruth:
                groundtruth[name] = [sent.split() for sent in temp_groundtruth[name]]

            scores = self._get_rouge_counts(
                [sent.split() for sent in predicted_timeline[pred_date]],
                groundtruth
            )

            for measure in self.measures:
                precision_numerator[measure].append(
                    (1 / (abs(pred_date.toordinal() - ref_date.toordinal()) + 1)) * scores[measure]["prec_num"])
                precision_denominator[measure].append(scores[measure]["prec_denom"])

        matched_prec = set(list(prec_row))

        for i, date in enumerate(pred_dates):
            if i not in matched_prec:
                pred_date = pred_dates[i]

                scores = self._get_rouge_counts(
                    [sent.split() for sent in predicted_timeline[pred_date]],
                    {str(i): [[""]] for i, _ in enumerate(reference_timelines.timelines)}
                )

                for measure in self.measures:
                    precision_numerator[measure].append(scores[measure]["prec_num"])
                    precision_denominator[measure].append(scores[measure]["prec_denom"])

        # recall
        for row, col in zip(rec_row, rec_col):
            pred_date = pred_dates[col]
            ref_date = ref_dates[row]

            temp_groundtruth = reference_timelines[ref_date]
            groundtruth = {}
            for name in temp_groundtruth:
                groundtruth[name] = [sent.split() for sent in temp_groundtruth[name]]

            scores = self._get_rouge_counts(
                [sent.split() for sent in predicted_timeline[pred_date]],
                groundtruth
            )

            for measure in self.measures:
                recall_numerator[measure].append(
                    (1 / (abs(pred_date.toordinal() - ref_date.toordinal()) + 1)) * scores[measure]["rec_num"])
                recall_denominator[measure].append(scores[measure]["rec_denom"])

        matched_rec = set(list(rec_row))

        for i, date in enumerate(ref_dates):
            if i not in matched_rec:
                ref_date = ref_dates[i]

                temp_groundtruth = reference_timelines[ref_date]
                groundtruth = {}
                for name in temp_groundtruth:
                    groundtruth[name] = [sent.split() for sent in temp_groundtruth[name]]

                scores = self._get_rouge_counts(
                    [[""]],
                    groundtruth
                )

                for measure in self.measures:
                    recall_numerator[measure].append(scores[measure]["rec_num"])
                    recall_denominator[measure].append(scores[measure]["rec_denom"])

        output_scores = {}

        for measure in self.measures:
            prec_denom_sum = sum(precision_denominator[measure])

            if prec_denom_sum == 0:
                prec = 0
            else:
                prec = sum(precision_numerator[measure]) / prec_denom_sum

            rec_denom_sum = sum(recall_denominator[measure])

            if rec_denom_sum == 0:
                rec = 0
            else:
                rec = sum(recall_numerator[measure]) / rec_denom_sum

            output_scores[measure] = {
                "precision": prec,
                "recall": rec,
                "f_score": util.get_f_score(prec, rec, beta=self.beta)
            }

        return output_scores
예제 #4
0
    def evaluate_agreement(self, predicted_timeline, reference_timelines):
        """ Evaluate a predicted timeline w.r.t. a set of reference timelines using the
        'agreement' ROUGE variant.

        This variant compares the daily summaries of a date if the date appears in both the
        predicted timeline and in one of the reference timelines.

        Args:
            predicted_timeline (data.timelines.Timeline): A timeline.
            reference_timelines (data.timelines.GroundTruth): A ground truth of timelines.

        Returns:
            A dict(str, dict(str, str)) object mapping each ROUGE measure in `self.measures`
            to a dict that maps 'precision', 'recall' and 'f_score' to the corresponding values,
            e.g.

                {"rouge_1": {"precision": 1.0, "recall": 1.0, "f_score": 1.0}}
        """
        precision_numerator = collections.defaultdict(list)
        precision_denominator = collections.defaultdict(list)
        recall_numerator = collections.defaultdict(list)
        recall_denominator = collections.defaultdict(list)

        pred_dates = predicted_timeline.get_dates()
        ref_dates = reference_timelines.get_dates()

        all_dates = pred_dates.union(ref_dates)

        for date in all_dates:
            temp_groundtruth = reference_timelines[date]
            groundtruth = {}
            for name in temp_groundtruth:
                groundtruth[name] = [sent.split() for sent in temp_groundtruth[name]]

            scores = self._get_rouge_counts(
                [sent.split() for sent in predicted_timeline[date]],
                groundtruth
            )

            for measure in self.measures:
                if date in pred_dates:
                    precision_numerator[measure].append(scores[measure]["prec_num"])
                    precision_denominator[measure].append(scores[measure]["prec_denom"])

                if date in ref_dates:
                    recall_numerator[measure].append(scores[measure]["rec_num"])
                    recall_denominator[measure].append(scores[measure]["rec_denom"])

        output_scores = {}

        for measure in self.measures:
            prec_denom_sum = sum(precision_denominator[measure])

            if prec_denom_sum == 0:
                prec = 0
            else:
                prec = sum(precision_numerator[measure]) / prec_denom_sum

            rec_denom_sum = sum(recall_denominator[measure])

            if rec_denom_sum == 0:
                rec = 0
            else:
                rec = sum(recall_numerator[measure]) / rec_denom_sum

            output_scores[measure] = {
                "precision": prec,
                "recall": rec,
                "f_score": util.get_f_score(prec, rec, beta=self.beta)
            }

        return output_scores
예제 #5
0
 def test_get_f_score(self):
     self.assertEqual(0, util.get_f_score(0, 1, 1))
     self.assertEqual(1, util.get_f_score(1, 1, 1))
     self.assertEqual(1, util.get_f_score(1, 1, 1))
     self.assertAlmostEqual(0.555555556, util.get_f_score(0.5, 1, 0.5))
예제 #6
0
    def train(self, corpora, preprocessed_information, timelines,
              topic_to_evaluate):
        """
        Computes per-day ROUGE F1 for each sentence in the corpus for
        `topic_to_evaluate` (This is quite a misuse of the semantics of
        this function).

        Params:
            corpora (dict(str, tilse.data.corpora.Corpus)): A mapping of topic names to corresponding corpora.
            preprocessed_information (object): Arbitrary information obtained from preprocessing.
            reference_timelines (dict(str, tilse.data.timelines.Groundtruth)): A mapping of topic names
                to corresponding reference timelines.
            topic_to_evaluate (str): The topic to evaluate (must be a key in `corpora`. The given topic will not
                be used during training (such that it can serve as evaluation data later).

        Returns:
            A mapping of timeline properties for each of the timelines in
            `timelines[`topic_to_evaluate`]` to an numpy array of per-day
            ROUGE-1 F1 scores for all sentences in the corresponding corpus.

        """
        rouge = RougeReimplementation()
        corpus = corpora[topic_to_evaluate]
        reference_timelines = timelines[topic_to_evaluate]

        rouge_vals = {}

        for tl in reference_timelines.timelines:
            tp = self.get_timeline_properties(tl)

            rouge_vals[tp] = []

            for doc in corpus.docs:
                for sent in doc:
                    sent_processed = [[x.content for x in sent]]
                    ref_processed = {
                        "0": [[x for x in s.split()] for s in tl[sent.date]]
                    }

                    rouge_computed = rouge.score_summary(
                        sent_processed, ref_processed)

                    if rouge_computed["rouge_1_p_count"] == 0:
                        prec = 0
                    else:
                        prec = rouge_computed["rouge_1_h_count"] / \
                               rouge_computed["rouge_1_p_count"]

                    if rouge_computed["rouge_1_m_count"] == 0:
                        rec = 0
                    else:
                        rec = rouge_computed["rouge_1_h_count"] / \
                              rouge_computed["rouge_1_m_count"]

                    f1 = get_f_score(prec, rec)

                    rouge_vals[tp].append(f1)

            rouge_vals[tp] = numpy.array(rouge_vals[tp])

        return rouge_vals
예제 #7
0
    def train(self, corpora, preprocessed_information, reference_timelines,
              timeline_to_evaluate):
        """
        Trains the model.

        For details on training, see the docstring of this class.

        Params:
            corpora (dict(str, tilse.data.corpora.Corpus)): A mapping of topic names to corresponding corpora.
            preprocessed_information (object): Arbitrary information obtained from preprocessing.
            reference_timelines (dict(str, tilse.data.timelines.Groundtruth)): A mapping of topic names
                to corresponding reference timelines.
            topic_to_evaluate (str): The topic to evaluate (must be a key in `corpora`. The given topic will not
                be used during training (such that it can serve as evaluation data later).

        Returns:
            Nothing, `self.model` is updated.
        """
        rouge = RougeReimplementation()

        features = []
        f1_scores = []

        for t in corpora:
            if t == timeline_to_evaluate:
                continue

            corpus = corpora[t]
            sum_tfidf, avg_tfidf = preprocessed_information[t]

            i = 0
            for doc in corpus:
                for sent in doc:
                    sent_processed = [[x.content for x in sent]]

                    ref_temp = reference_timelines[t][sent.date]

                    ref_processed = {}

                    for k, sents in ref_temp.items():
                        ref_processed[k] = [[x for x in s.split()]
                                            for s in sents]

                    rouge_computed = rouge.score_summary(
                        sent_processed, ref_processed)

                    if rouge_computed["rouge_1_p_count"] == 0:
                        prec = 0
                    else:
                        prec = rouge_computed[
                            "rouge_1_h_count"] / rouge_computed[
                                "rouge_1_p_count"]

                    if rouge_computed["rouge_1_m_count"] == 0:
                        rec = 0
                    else:
                        rec = rouge_computed[
                            "rouge_1_h_count"] / rouge_computed[
                                "rouge_1_m_count"]

                    f1 = util.get_f_score(prec, rec)

                    features.append(
                        Regression._compute_features_for_sent(
                            sent, i, sum_tfidf, avg_tfidf))

                    f1_scores.append(f1)

                    i += 1

        vectorized = self.vectorizer.fit_transform(features)

        self.model.fit(vectorized, f1_scores)