Esempio n. 1
0
    def __init__(self, config, max_vocab_size=10000):
        self.config = config
        self.max_utt_len = config.max_utt_len
        self.black_domains = config.black_domains
        self.black_ratio = config.black_ratio
        self.include_domain = config.include_domain
        self.include_example = config.include_example
        self.include_state = config.include_state
        self.tokenize = get_tokenize()

        train_data, train_meta = self._read_file(self.config.train_dir,
                                                 is_train=True)
        test_data, test_meta = self._read_file(self.config.test_dir,
                                               is_train=False)

        # combine train and test domain meta
        train_meta.update(test_meta)
        self.domain_meta = self._process_meta(train_meta)
        self.corpus = self._process_dialog(train_data)
        self.test_corpus = self._process_dialog(test_data)
        self.logger.info("Loaded Corpus with %d, test %d" %
                         (len(self.corpus), len(self.test_corpus)))

        # build up a vocabulary
        self.vocab, self.rev_vocab = self._build_vocab(max_vocab_size)

        self.logger.info("Done loading corpus")
Esempio n. 2
0
 def __init__(self, config):
     self.config = config
     self._path = config.data_dir[0]
     self.max_utt_len = config.max_utt_len
     self.tokenize = get_tokenize()
     self.black_domains = config.black_domains
     self.black_ratio = config.black_ratio
     self.train_corpus = self._read_file(os.path.join(self._path, 'kvret_train_public.json'))
     self.valid_corpus = self._read_file(os.path.join(self._path, 'kvret_dev_public.json'))
     self.test_corpus = self._read_file(os.path.join(self._path, 'kvret_test_public.json'))
     with open(os.path.join(self._path, 'kvret_entities.json'), 'rb') as f:
         self.ent_metas = json.load(f)
     self.domain_descriptions = self._read_domain_descriptions(self._path)
     self._build_vocab()
     print("Done loading corpus")
Esempio n. 3
0
    def get_report(self, include_error=False):
        reports = []
        tokenize = get_tokenize()

        for domain, labels in self.domain_labels.items():
            predictions = self.domain_hyps[domain]
            self.logger.info("Generate report for {} for {} samples".format(
                domain, len(predictions)))
            refs, hyps = [], []

            # find entity precision, recall and f1
            tp, fp, fn = 0.0, 0.0, 0.0

            for label, hyp in zip(labels, predictions):
                label = label.replace(EOS, '').replace(BOS, '')
                hyp = hyp.replace(EOS, '').replace(BOS, '')
                ref_tokens = tokenize(label)[2:]
                hyp_tokens = tokenize(hyp)[2:]

                refs.append([ref_tokens])
                hyps.append(hyp_tokens)

                label_ents = self.pred_ents(label, tokenize, None)
                hyp_ents = self.pred_ents(hyp, tokenize, None)
                # hyp_ents = list(set(hyp_ents))

                ttpp, ffpp, ffnn = self._get_tp_fp_fn(label_ents, hyp_ents)
                tp += ttpp
                fp += ffpp
                fn += ffnn

            ent_precision, ent_recall, ent_f1 = self._get_prec_recall(
                tp, fp, fn)

            # compute corpus level scores
            bleu = bleu_score.corpus_bleu(
                refs, hyps, smoothing_function=SmoothingFunction().method1)
            report = "\nDomain: %s BLEU %f\n Entity precision %f recall %f and f1 %f\n" \
                     % (domain, bleu, ent_precision, ent_recall, ent_f1)
            reports.append(report)

        return "\n==== REPORT===={report}".format(
            report="========".join(reports))
Esempio n. 4
0
    def get_report(self, include_error=False):
        reports = []
        errors = []

        for domain, labels in self.domain_labels.items():
            intent2refs = defaultdict(list)
            intent2hyps = defaultdict(list)

            predictions = self.domain_hyps[domain]
            self.logger.info("Generate report for {} for {} samples".format(
                domain, len(predictions)))

            # find entity precision, recall and f1
            tp, fp, fn = 0.0, 0.0, 0.0

            # find intent precision recall f1
            itp, ifp, ifn = 0.0, 0.0, 0.0

            # backend accuracy
            btp, bfp, bfn = 0.0, 0.0, 0.0

            # BLEU score
            refs, hyps = [], []

            pred_intents = self.pred_acts(predictions)
            label_intents = self.pred_acts(labels)

            tokenize = get_tokenize()
            bad_predictions = []

            for label, hyp, label_ints, pred_ints in zip(
                    labels, predictions, label_intents, pred_intents):
                refs.append([label.split()])
                hyps.append(hyp.split())

                label_ents = self.pred_ents(label, tokenize, domain)
                pred_ents = self.pred_ents(hyp, tokenize, domain)

                for intent in label_ints:
                    intent2refs[intent].append([label.split()])
                    intent2hyps[intent].append(hyp.split())

                # update the intent
                ttpp, ffpp, ffnn = self._get_tp_fp_fn(label_ints, pred_ints)
                itp += ttpp
                ifp += ffpp
                ifn += ffnn

                # entity or KB search
                ttpp, ffpp, ffnn = self._get_tp_fp_fn(label_ents, pred_ents)
                if ffpp > 0 or ffnn > 0:
                    bad_predictions.append((label, hyp))

                if "query" in label_ints:
                    btp += ttpp
                    bfp += ffpp
                    bfn += ffnn
                else:
                    tp += ttpp
                    fp += ffpp
                    fn += ffnn

            # compute corpus level scores
            bleu = bleu_score.corpus_bleu(
                refs, hyps, smoothing_function=SmoothingFunction().method1)
            ent_precision, ent_recall, ent_f1 = self._get_prec_recall(
                tp, fp, fn)
            int_precision, int_recall, int_f1 = self._get_prec_recall(
                itp, ifp, ifn)
            back_precision, back_recall, back_f1 = self._get_prec_recall(
                btp, bfp, bfn)

            # compute BLEU w.r.t intents
            intent_report = []
            for intent in intent2refs.keys():
                i_bleu = bleu_score.corpus_bleu(
                    intent2refs[intent],
                    intent2hyps[intent],
                    smoothing_function=SmoothingFunction().method1)
                intent_report.append("{}: {}".format(intent, i_bleu))

            intent_report = "\n".join(intent_report)

            # create bad cases
            error = ''
            if include_error:
                error = '\nDomain {} errors\n'.format(domain)
                error += "\n".join([
                    'True: {} ||| Pred: {}'.format(r, h)
                    for r, h in bad_predictions
                ])
            report = "\nDomain: %s\n" \
                     "Entity precision %f recall %f and f1 %f\n" \
                     "Intent precision %f recall %f and f1 %f\n" \
                     "KB precision %f recall %f and f1 %f\n" \
                     "BLEU %f BEAK %f\n\n%s\n" \
                     % (domain,
                        ent_precision, ent_recall, ent_f1,
                        int_precision, int_recall, int_f1,
                        back_precision, back_recall, back_f1,
                        bleu, gmean([ent_f1, int_f1, back_f1, bleu]),
                        intent_report)
            reports.append(report)
            errors.append(error)

        if include_error:
            return "\n==== REPORT===={error}\n========\n {report}".format(
                error="========".join(errors), report="========".join(reports))
        else:
            return "\n==== REPORT===={report}".format(
                report="========".join(reports))