Example #1
0
    def score(self,
              hypothesis: List[str],
              references: List[List[str]],
              tags: Optional[List[List[str]]] = None) -> VizSeqScore:
        self._update_n_workers(len(hypothesis))

        sent_scores = _get_sent_cider(hypothesis,
                                      references,
                                      extra_args={
                                          'n_workers': self.n_workers,
                                          'verbose': self.verbose
                                      })
        corpus_score, group_scores = None, None

        if self.corpus_level:
            corpus_score = np.mean(sent_scores)
        if not self.sent_level:
            sent_scores = None
        if tags is not None:
            tag_set = self._unique(tags)
            group_scores = {}
            for t in tag_set:
                indices = [i for i, cur in enumerate(tags) if t in cur]
                group_scores[t] = np.mean([sent_scores[i] for i in indices])

        return VizSeqScore.make(corpus_score=corpus_score,
                                sent_scores=sent_scores,
                                group_scores=group_scores)
Example #2
0
    def score(
            self, hypothesis: List[str], references: List[List[str]],
            tags: Optional[List[List[str]]] = None
    ) -> VizSeqScore:
        self._update_n_workers(len(hypothesis))

        corpus_score, group_scores, sent_scores = None, None, None

        if self.sent_level:
            sent_scores = self._score_sentences_multiprocess(
                hypothesis, references, _get_sent_bleu
            )

        if self.corpus_level:
            corpus_score = self.score_corpus_multiprocess(
                hypothesis, references
            )

        if tags is not None:
            tag_set = self._unique(tags)
            group_scores = {}
            for t in tag_set:
                indices = [i for i, cur in enumerate(tags) if t in cur]
                ref_slice = [[r[i] for i in indices] for r in references]
                pred_slice = [hypothesis[i] for i in indices]
                group_scores[t] = self.score_corpus_multiprocess(
                    pred_slice, ref_slice
                )

        return VizSeqScore.make(
                corpus_score=corpus_score, sent_scores=sent_scores,
                group_scores=group_scores
            )
Example #3
0
    def score(
            self, hypothesis: List[str], references: List[List[str]],
            tags: Optional[List[List[str]]] = None
    ) -> VizSeqScore:
        corpus_score, sent_scores, group_scores = None, None, None

        import bert_score as bs
        import langid
        import logging
        logging.getLogger('pytorch_pretrained_bert').setLevel(logging.WARNING)
        logging.getLogger('langid').setLevel(logging.WARNING)

        lang = langid.classify(references[0][0])[0]

        sent_scores = bs.score(
            hypothesis, references[0], nthreads=self.n_workers, lang=lang,
            verbose=self.verbose
        )[2].tolist()

        if self.corpus_level:
            corpus_score = np.mean(sent_scores)

        if tags is not None:
            tag_set = self._unique(tags)
            group_scores = {}
            for t in tag_set:
                indices = [i for i, cur in enumerate(tags) if t in cur]
                group_scores[t] = np.mean([sent_scores[i] for i in indices])

        return VizSeqScore.make(
                corpus_score=corpus_score, sent_scores=sent_scores,
                group_scores=group_scores
            )
Example #4
0
    def score(self,
              hypothesis: List[str],
              references: List[List[str]],
              tags: Optional[List[List[str]]] = None) -> VizSeqScore:
        self._update_n_workers(len(hypothesis))

        corpus_score, group_scores, sent_scores = None, None, None
        sent_scores = self._score_sentences_multiprocess(
            hypothesis, references, _get_sent_wer)

        sent_lens = None
        if self.corpus_level:
            sent_lens = self._score_sentences_multiprocess(
                hypothesis, references, _get_sent_len_r)
            n_incorrect = np.sum(
                [s * l for s, l in zip(sent_scores, sent_lens)])
            corpus_score = n_incorrect / np.sum(sent_lens)

        if tags is not None:
            tag_set = self._unique(tags)
            group_scores = {}
            if sent_lens is None:
                sent_lens = self._score_sentences_multiprocess(
                    hypothesis, references, _get_sent_len_r)
            for t in tag_set:
                indices = [i for i, cur in enumerate(tags) if t in cur]
                cur_sent_scores = [sent_scores[i] for i in indices]
                cur_sent_lens = [sent_lens[i] for i in indices]
                n_incorrect = np.sum(
                    [s * l for s, l in zip(cur_sent_scores, cur_sent_lens)])
                group_scores[t] = n_incorrect / np.sum(sent_lens)

        return VizSeqScore.make(corpus_score=corpus_score,
                                sent_scores=sent_scores,
                                group_scores=group_scores)
Example #5
0
    def score(
            self, hypothesis: List[str], references: Optional[List[List[str]]] = None,
            tags: Optional[List[List[str]]] = None
    ) -> VizSeqScore:
        
        corpus_score, group_scores, sent_scores = None, None, None
        
        selfbleu_scores = compute_self_bleu(hypothesis)
            
        if self.corpus_level:
            # implement corpus-level score
            corpus_score = np.mean(selfbleu_scores)

        if self.sent_level:
            # implement sentence-level score
            sent_scores = selfbleu_scores
        
        if tags is not None:
            raise NotImplementedError
            # tag_set = self._unique(tags)
            # implement group-level (by sentence tags) score
            # group_scores={t: 99.9 for t in tag_set}
        
        return VizSeqScore.make(
            corpus_score=corpus_score, sent_scores=sent_scores,
            group_scores=group_scores
        )
Example #6
0
    def score(
            self, hypothesis: List[str], references: Optional[List[List[str]]] = None,
            tags: Optional[List[List[str]]] = None
    ) -> VizSeqScore:
        
        inter_dist, intra_dist = compute_distinct_N(hypothesis, N=2)

        return VizSeqScore(
            corpus_score = inter_dist,
            sent_scores = intra_dist
        )
Example #7
0
    def score(self,
              hypothesis: List[str],
              references: List[List[str]],
              tags: Optional[List[List[str]]] = None) -> VizSeqScore:
        corpus_score, group_scores, sent_scores = None, None, None

        sent_scores = _get_sent_laser(hypothesis, references)

        if self.corpus_level:
            corpus_score = np.mean(sent_scores)

        if tags is not None:
            tag_set = self._unique(tags)
            group_scores = {}
            for t in tag_set:
                indices = [i for i, cur in enumerate(tags) if t in cur]
                group_scores[t] = np.mean([sent_scores[i] for i in indices])

        return VizSeqScore.make(corpus_score=corpus_score,
                                sent_scores=sent_scores,
                                group_scores=group_scores)
Example #8
0
    def score(
        self,
        hypothesis: List[str],
        references: List[List[str]],
        tags: Optional[List[List[str]]] = None,
        sources: Optional[List[List[str]]] = None,
    ) -> VizSeqScore:
        problem = self.extra_args["problem"]
        # Only relevant if predicting tasks
        assert problem in (
            "TargetProductAndRequirements_TO_Tasks",
            "Requirements_TO_TargetProductAndTasks",
            "TargetProductAndRequirementsAndTasks",
            "RequirementsAndTargetProductAndTasks",
        )

        corpus_score, sent_scores, group_scores = None, None, None

        requirements = sources[0]
        hypotheses = hypothesis

        sent_scores = []
        for req_str, hypo in zip(requirements, hypotheses):
            try:
                score = compute_requirement_coverage(hypo,
                                                     req_str,
                                                     essential=True,
                                                     problem=problem)
            except ValueError:
                score = 0
            sent_scores.append(score)

        if self.corpus_level:
            corpus_score = np.mean(sent_scores) * 100

        return VizSeqScore.make(corpus_score=corpus_score,
                                sent_scores=sent_scores,
                                group_scores={})
Example #9
0
    def score(
        self,
        hypothesis: List[str],
        references: List[List[str]],
        tags: Optional[List[List[str]]] = None,
        sources: Optional[List[List[str]]] = None,
    ) -> VizSeqScore:
        # global bert_scorer
        problem = self.extra_args["problem"]

        # Only relevant if predicting tasks
        assert problem in (
            "TargetProductAndRequirements_TO_Tasks",
            "Requirements_TO_TargetProductAndTasks",
            "TargetProductAndRequirementsAndTasks",
            "RequirementsAndTargetProductAndTasks",
        )

        corpus_score, sent_scores, group_scores = None, None, None

        # requirements = sources[0]
        references = references[0]
        hypotheses = hypothesis

        sent_scores = []
        for ref, hypo in zip(references, hypotheses):
            # if problem == 'TargetProductAndRequirementsAndTasks':
            # # HACK: RecipeGPT grammar
            # tasks_gt = re.split('\. |! ', ref.rstrip(' <end-directions>'))
            # tasks_pred = re.split('\. |! ',
            #                       hypo.rstrip(' <end-directions>'))
            if problem in (
                    "Requirements_TO_TargetProductAndTasks",
                    "RequirementsAndTargetProductAndTasks",
            ):
                # Requirements_TO_TargetProductAndTasks
                tgt_prod_and_tasks_gt = string_to_tasks(ref, parse_tp=True)
                tasks_gt = tgt_prod_and_tasks_gt[1:]

                try:
                    tgt_prod_and_tasks_pred = string_to_tasks(hypo,
                                                              parse_tp=True)
                    tasks_pred = tgt_prod_and_tasks_pred[1:]
                except ValueError:
                    continue
            else:
                # TargetProductAndRequirements_TO_Tasks or TargetProductAndRequirementsAndTasks
                tasks_gt = string_to_tasks(ref)
                tasks_pred = string_to_tasks(hypo)

            try:
                score = compute_task_order_score(tasks_gt, tasks_pred)
                sent_scores.append(score)
            except ScoreComputationError:
                continue

        if self.corpus_level:
            print(len(sent_scores))
            corpus_score = np.mean(sent_scores)

        return VizSeqScore.make(corpus_score=corpus_score,
                                sent_scores=sent_scores,
                                group_scores={})