def score_responses(self):
        metatypes = {
            'ALL': ['Event', 'Relation'],
            'Event': ['Event'],
            'Relation': ['Relation']
        }
        scores = []
        for document_id in self.get('core_documents'):
            language = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id).get('language')
            gold_trfs = self.get('document_type_role_fillers', 'gold',
                                 document_id)
            system_trfs = self.get('document_type_role_fillers', 'system',
                                   document_id)
            self.align_trfs(document_id, gold_trfs, system_trfs)
            for metatype_key in metatypes:
                num_gold_trf, num_system_trf, precision, recall, f1 = self.get(
                    'score', gold_trfs, system_trfs, metatypes[metatype_key])
                if num_gold_trf + num_system_trf == 0: continue
                score = ArgumentMetricScore(logger=self.logger,
                                            run_id=self.get('run_id'),
                                            document_id=document_id,
                                            language=language,
                                            metatype=metatype_key,
                                            precision=precision,
                                            recall=recall,
                                            f1=f1)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype_sortkey', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, ArgumentMetricScore)
        self.scores = scores_printer
Esempio n. 2
0
    def score_responses(self):

        scores = ScorePrinter(self.logger, self.printing_specs, self.separator)

        # TODO: add details

        for query_id in self.queries_to_score:
            for document_id in self.document_mappings.get('core_documents'):
                num_submitted = 150
                num_correct = 70
                num_incorrect = 30
                num_right = 60
                num_wrong = 40
                num_redundant = 10
                num_pooled = 100
                num_ignored = 50
                num_ground_truth = 100
                scoring_metric_1 = 0.8
                scoring_metric_2 = 0.7
                scoring_metric_3 = 0.6
                score = ClassScore(self.logger, self.runid, query_id,
                                   document_id, num_submitted, num_correct,
                                   num_incorrect, num_right, num_wrong,
                                   num_redundant, num_pooled, num_ignored,
                                   num_ground_truth, scoring_metric_1,
                                   scoring_metric_2, scoring_metric_3)
                scores.add(score)
        self.scores = scores
Esempio n. 3
0
    def score_responses(self):
        scores = []
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get('documents').get(document_id)
            language = document.get('language')
            document_gold_to_system = self.get('cluster_alignment').get('gold_to_system').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(gold_cluster_id).get('aligned_similarity')
                similarity = 0
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('cluster', 'gold', document_id, gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Event', 'Relation']: continue
                if list(gold_cluster.get('dates').values())[0] is None:
                    self.record_event('NO_TEMPORAL_CONSTRAINT', gold_cluster_id, document_id)
                    continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id, system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event('UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id)
                    if len(gold_cluster.get('dates').keys()) > 1:
                        self.record_event('UNEXPECTED_NUM_DATES', gold_cluster_id, document_id)
                    similarity = self.get('temporal_similarity', list(gold_cluster.get('dates').values())[0], list(system_cluster.get('dates').values()))
                score = TemporalMetricScore(logger=self.logger,
                                            run_id=self.get('run_id'),
                                            document_id=document_id,
                                            language=language,
                                            metatype=metatype,
                                            gold_cluster_id=gold_cluster_id,
                                            system_cluster_id=system_cluster_id,
                                            similarity=similarity)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype', False),
                                        ('gold_cluster_id', False),
                                        ('system_cluster_id', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, TemporalMetricScore)
        self.scores = scores_printer
Esempio n. 4
0
    def score_responses(self):
        metatypes = {
            'ALL': ['Entity', 'Event'],
            'Entity': ['Entity'],
            'Event': ['Event']
        }
        scores = []
        for document_id in self.get('core_documents'):
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            for metatype_key in metatypes:
                max_total_similarity = self.get('max_total_similarity',
                                                document_id,
                                                metatypes[metatype_key])
                total_self_similarity_gold = self.get('total_self_similarity',
                                                      'gold', document_id,
                                                      metatypes[metatype_key])
                total_self_similarity_system = self.get(
                    'total_self_similarity', 'system', document_id,
                    metatypes[metatype_key])

                precision = max_total_similarity / total_self_similarity_system if total_self_similarity_system else 0
                recall = max_total_similarity / total_self_similarity_gold
                f1 = 2 * precision * recall / (
                    precision + recall) if precision + recall else 0
                score = CoreferenceMetricScore(logger=self.logger,
                                               run_id=self.get('run_id'),
                                               document_id=document_id,
                                               language=language,
                                               metatype=metatype_key,
                                               precision=precision,
                                               recall=recall,
                                               f1=f1)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype_sortkey', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, CoreferenceMetricScore)
        self.scores = scores_printer
Esempio n. 5
0
    def score_responses(self):
        scores = ScorePrinter(self.logger, self.printing_specs, self.separator)
        mean_f1 = 0
        for document_id in self.get('core_documents'):
            max_total_similarity = self.get('max_total_similarity',
                                            document_id)
            total_self_similarity_gold = self.get('total_self_similarity',
                                                  'gold', document_id)
            total_self_similarity_system = self.get('total_self_similarity',
                                                    'system', document_id)

            precision = max_total_similarity / total_self_similarity_system if total_self_similarity_system else 0
            recall = max_total_similarity / total_self_similarity_gold
            f1 = 2 * precision * recall / (precision +
                                           recall) if precision + recall else 0
            score = CoreferenceMetricScore(self.logger, self.get('runid'),
                                           document_id, precision, recall, f1)
            mean_f1 += f1
            scores.add(score)
        mean_f1 = mean_f1 / len(self.get('core_documents').keys())
        mean_score = CoreferenceMetricScore(self.logger,
                                            self.get('runid'),
                                            'Summary',
                                            '',
                                            '',
                                            mean_f1,
                                            summary=True)
        scores.add(mean_score)
        self.scores = scores
Esempio n. 6
0
    def score_responses(self):
        scores = []
        sum_average_precision = 0
        for query_id in self.get('queries_to_score'):
            entity_id = self.get('entity_id', query_id)
            counts = self.get('counts', query_id)
            sum_average_precision += counts['average_precision']
            score = AcrossDocumentsCoreferenceMetricScore(self.get('logger'),
                                                          run_id=self.get('run_id'),
                                                          query_id=query_id,
                                                          entity_id=entity_id,
                                                          **counts)
            scores.append(score)

        macro_counts = {'average_precision': sum_average_precision/len(self.get('queries_to_score'))}
        for field_name in [s.get('name') for s in self.get('printing_specs') if s.get('name').startswith('num_')]:
            macro_counts[field_name] = macro_counts[field_name] if field_name in macro_counts else ''
        macro_average_score = AcrossDocumentsCoreferenceMetricScore(self.get('logger'),
                                                                    run_id=self.get('run_id'),
                                                                    query_id='ALL-Macro',
                                                                    entity_id='Summary',
                                                                    summary=True,
                                                                    **macro_counts)

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('entity_id', False),
                                        ('query_id', False))):
            scores_printer.add(score)
        scores_printer.add(macro_average_score)
        self.scores = scores_printer
Esempio n. 7
0
    def score_responses(self):
        metatypes = {
            'ALL': ['Entity', 'Event'],
            'Entity': ['Entity'],
            'Event': ['Event']
        }
        scores = []
        mean_f1s = {}
        counts = {}
        for document_id in self.get('core_documents'):
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            for metatype_key in metatypes:
                max_total_similarity = self.get('max_total_similarity',
                                                document_id,
                                                metatypes[metatype_key])
                total_self_similarity_gold = self.get('total_self_similarity',
                                                      'gold', document_id,
                                                      metatypes[metatype_key])
                total_self_similarity_system = self.get(
                    'total_self_similarity', 'system', document_id,
                    metatypes[metatype_key])

                precision = max_total_similarity / total_self_similarity_system if total_self_similarity_system else 0
                recall = max_total_similarity / total_self_similarity_gold
                f1 = 2 * precision * recall / (
                    precision + recall) if precision + recall else 0
                score = CoreferenceMetricScore(self.logger, self.get('runid'),
                                               document_id, language,
                                               metatype_key, precision, recall,
                                               f1)
                for language_key in ['ALL', language]:
                    key = '{language}:{metatype}'.format(language=language_key,
                                                         metatype=metatype_key)
                    mean_f1s[key] = mean_f1s.get(key, 0) + f1
                    counts[key] = counts.get(key, 0) + 1
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype_sortkey', False))):
            scores_printer.add(score)

        for key in sorted(mean_f1s, key=self.order):
            mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0
            language, metatype = key.split(':')
            mean_score = CoreferenceMetricScore(self.logger,
                                                self.get('runid'),
                                                'Summary',
                                                language,
                                                metatype,
                                                '',
                                                '',
                                                mean_f1,
                                                summary=True)
            scores_printer.add(mean_score)
        self.scores = scores_printer
Esempio n. 8
0
    def score_responses(self):
        metatypes = {
            'ALL': ['Event', 'Relation'],
            'Event': ['Event'],
            'Relation': ['Relation']
        }
        scores = []
        mean_f1s = {}
        counts = {}
        for document_id in self.get('core_documents'):
            language = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id).get('language')
            gold_trfs = self.get('document_type_role_fillers', 'gold',
                                 document_id)
            system_trfs = self.get('document_type_role_fillers', 'system',
                                   document_id)
            self.align_trfs(document_id, gold_trfs, system_trfs)
            for metatype_key in metatypes:
                num_gold_trf, num_system_trf, precision, recall, f1 = self.get(
                    'score', gold_trfs, system_trfs, metatypes[metatype_key])
                if num_gold_trf + num_system_trf == 0: continue
                for language_key in ['ALL', language]:
                    aggregate_key = '{language}:{metatype}'.format(
                        language=language_key, metatype=metatype_key)
                    mean_f1s[aggregate_key] = mean_f1s.get(aggregate_key,
                                                           0) + f1
                    counts[aggregate_key] = counts.get(aggregate_key, 0) + 1
                score = ArgumentMetricScore(self.logger, self.get('runid'),
                                            document_id, language,
                                            metatype_key, precision, recall,
                                            f1)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype_sortkey', False))):
            scores_printer.add(score)

        for key in sorted(mean_f1s, key=self.order):
            mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0
            language, metatype = key.split(':')
            mean_score = ArgumentMetricScore(self.logger,
                                             self.get('runid'),
                                             'Summary',
                                             language,
                                             metatype,
                                             '',
                                             '',
                                             mean_f1,
                                             summary=True)
            scores_printer.add(mean_score)

        self.scores = scores_printer
Esempio n. 9
0
    def score_responses(self):
        scores = ScorePrinter(self.logger, self.printing_specs, self.separator)
        mean_f1 = 0
        count = 0
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            document_system_to_gold = self.get('cluster_alignment').get(
                'system_to_gold').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                if system_cluster_id and system_cluster_id != 'None' and aligned_similarity == 0:
                    self.record_event('DEFAULT_CRITICAL_ERROR',
                                      'aligned_similarity=0')
                precision, recall, f1 = [0, 0, 0]
                if system_cluster_id and system_cluster_id != 'None':
                    gold_cluster = self.get('gold_responses').get(
                        'document_clusters').get(document_id).get(
                            gold_cluster_id)
                    system_cluster = self.get('system_responses').get(
                        'document_clusters').get(document_id).get(
                            system_cluster_id)
                    skip_flag = False
                    for cluster in [gold_cluster, system_cluster]:
                        if cluster.get('metatype') not in [
                                'Event', 'Relation'
                        ]:
                            skip_flag = True
                    if skip_flag: continue
                    gold_frame = self.get('gold_responses').get(
                        'document_frames').get(document_id).get(
                            gold_cluster_id)
                    gold_slot_fillers = {}
                    if gold_frame is None:
                        self.record_event('MISSING_GOLD_FRAME',
                                          gold_cluster.get('metatype'),
                                          gold_cluster_id, document_id,
                                          self.get('code_location'))
                        continue
                    for role_name in gold_frame.get('role_fillers'):
                        for gold_filler_cluster_id in gold_frame.get(
                                'role_fillers').get(role_name):
                            gold_slot_fillers['{}:{}'.format(
                                role_name, gold_filler_cluster_id)] = 1
                    system_frame = self.get('system_responses').get(
                        'document_frames').get(document_id).get(
                            system_cluster_id)
                    system_slot_fillers = {}
                    for role_name in system_frame.get('role_fillers'):
                        for system_filler_cluster_id in system_frame.get(
                                'role_fillers').get(role_name):
                            aligned_gold_filler_cluster_id = document_system_to_gold.get(
                                system_filler_cluster_id).get('aligned_to')
                            aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get(
                                system_filler_cluster_id).get(
                                    'aligned_similarity')
                            if aligned_gold_filler_cluster_id and aligned_gold_filler_cluster_id != 'None':
                                if aligned_gold_filler_cluster_id_similarity == 0:
                                    self.record_event('DEFAULT_CRITICAL_ERROR',
                                                      'aligned_similarity=0')
                                system_slot_fillers['{}:{}'.format(
                                    role_name,
                                    aligned_gold_filler_cluster_id)] = 1
                            else:
                                system_slot_fillers['{}:{}'.format(
                                    role_name, system_filler_cluster_id)] = 1
                    if len(gold_slot_fillers) and len(system_slot_fillers):
                        precision, recall, f1 = get_precision_recall_and_f1(
                            set(gold_slot_fillers.keys()),
                            set(system_slot_fillers.keys()))
                mean_f1 += f1
                count += 1
                score = FrameMetricScore(self.logger, self.get('runid'),
                                         document_id, gold_cluster_id,
                                         system_cluster_id, precision, recall,
                                         f1)
                scores.add(score)
            # add scores corresponding to unaligned system clusters
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(
                    system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(
                    system_cluster_id).get('aligned_similarity')
                if gold_cluster_id and gold_cluster_id != 'None' and aligned_similarity == 0:
                    self.record_event('DEFAULT_CRITICAL_ERROR',
                                      'aligned_similarity=0')
                if gold_cluster_id and gold_cluster_id != 'None': continue
                precision, recall, f1 = [0, 0, 0]
                count += 1
                score = FrameMetricScore(self.logger, self.get('runid'),
                                         document_id, gold_cluster_id,
                                         system_cluster_id, precision, recall,
                                         f1)
                scores.add(score)

        mean_f1 = mean_f1 / count if count else 0
        mean_score = FrameMetricScore(self.logger,
                                      self.get('runid'),
                                      'Summary',
                                      '',
                                      '',
                                      '',
                                      '',
                                      mean_f1,
                                      summary=True)
        scores.add(mean_score)
        self.scores = scores
Esempio n. 10
0
    def score_responses(self):
        scores = []
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            document_system_to_gold = self.get('cluster_alignment').get(
                'system_to_gold').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                precision, recall, f1 = [0, 0, 0]
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('cluster', 'gold', document_id,
                                        gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Event', 'Relation']: continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id,
                                              system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event(
                            'UNEXPECTED_ALIGNED_CLUSTER_METATYPE',
                            system_cluster.get('metatype'), system_cluster_id,
                            metatype, gold_cluster_id)
                    gold_frame = self.get('frame', 'gold', document_id,
                                          gold_cluster_id)
                    gold_slot_fillers = {}
                    if gold_frame is None or len(
                            gold_frame.get('role_fillers')) == 0:
                        if gold_cluster.get('metatype') == 'Relation':
                            self.record_event('MISSING_GOLD_FRAME',
                                              gold_cluster.get('metatype'),
                                              gold_cluster_id, document_id,
                                              self.get('code_location'))
                        continue
                    for role_name in gold_frame.get('role_fillers'):
                        for gold_filler_cluster_id in gold_frame.get(
                                'role_fillers').get(role_name):
                            gold_slot_fillers['{}:{}'.format(
                                role_name, gold_filler_cluster_id)] = 1
                    system_frame = self.get('frame', 'system', document_id,
                                            system_cluster_id)
                    if system_frame:
                        system_slot_fillers = {}
                        for role_name in system_frame.get('role_fillers'):
                            for system_filler_cluster_id in system_frame.get(
                                    'role_fillers').get(role_name):
                                aligned_gold_filler_cluster_id = document_system_to_gold.get(
                                    system_filler_cluster_id).get('aligned_to')
                                aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get(
                                    system_filler_cluster_id).get(
                                        'aligned_similarity')
                                if aligned_gold_filler_cluster_id != 'None':
                                    if aligned_gold_filler_cluster_id_similarity == 0:
                                        self.record_event(
                                            'DEFAULT_CRITICAL_ERROR',
                                            'aligned_similarity=0')
                                    system_slot_fillers['{}:{}'.format(
                                        role_name,
                                        aligned_gold_filler_cluster_id)] = 1
                                else:
                                    system_slot_fillers['{}:{}'.format(
                                        role_name,
                                        system_filler_cluster_id)] = 1
                        if len(gold_slot_fillers) and len(system_slot_fillers):
                            precision, recall, f1 = get_precision_recall_and_f1(
                                set(gold_slot_fillers.keys()),
                                set(system_slot_fillers.keys()))
                score = FrameMetricScore(logger=self.logger,
                                         run_id=self.get('run_id'),
                                         document_id=document_id,
                                         language=language,
                                         metatype=metatype,
                                         gold_cluster_id=gold_cluster_id,
                                         system_cluster_id=system_cluster_id,
                                         precision=precision,
                                         recall=recall,
                                         f1=f1)
                scores.append(score)
            # add scores corresponding to unaligned system clusters
            precision, recall, f1 = [0, 0, 0]
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(
                    system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(
                    system_cluster_id).get('aligned_similarity')
                if system_cluster_id != 'None':
                    if gold_cluster_id == 'None':
                        metatype = self.get('cluster', 'system', document_id,
                                            system_cluster_id).get('metatype')
                        if metatype not in ['Event', 'Relation']: continue
                        score = FrameMetricScore(
                            logger=self.logger,
                            run_id=self.get('run_id'),
                            document_id=document_id,
                            language=language,
                            metatype=metatype,
                            gold_cluster_id=gold_cluster_id,
                            system_cluster_id=system_cluster_id,
                            precision=precision,
                            recall=recall,
                            f1=f1)
                        scores.append(score)
                    elif aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores,
                               (('document_id', False), ('metatype', False),
                                ('gold_cluster_id', False),
                                ('system_cluster_id', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, FrameMetricScore)
        self.scores = scores_printer
Esempio n. 11
0
    def score_responses(self):
        scores = ScorePrinter(self.logger, self.printing_specs, self.separator)
        mean_f1 = 0
        count = 0
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                if system_cluster_id and aligned_similarity == 0:
                    self.record_event('DEFAULT_CRITICAL_ERROR',
                                      'aligned_similarity=0')
                precision, recall, f1 = [0, 0, 0]
                if system_cluster_id:
                    gold_cluster_types = set(
                        self.get('gold_responses').get(
                            'document_clusters').get(document_id).get(
                                gold_cluster_id).get('all_expanded_types'))
                    system_cluster_types = set()
                    if document_id in self.get('system_responses').get(
                            'document_clusters'):
                        system_cluster_types = set(
                            self.get('system_responses').
                            get('document_clusters').get(document_id).get(
                                system_cluster_id).get('all_expanded_types'))
                    precision, recall, f1 = get_precision_recall_and_f1(
                        gold_cluster_types, system_cluster_types)
                mean_f1 += f1
                count += 1
                score = TypeMetricScore(self.logger, self.get('runid'),
                                        document_id, gold_cluster_id,
                                        system_cluster_id, precision, recall,
                                        f1)
                scores.add(score)
            # add scores unaligned system clusters
            document_system_to_gold = self.get('cluster_alignment').get(
                'system_to_gold').get(document_id)
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(
                    system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(
                    system_cluster_id).get('aligned_similarity')
                if gold_cluster_id and aligned_similarity == 0:
                    self.record_event('DEFAULT_CRITICAL_ERROR',
                                      'aligned_similarity=0')
                if gold_cluster_id: continue
                precision, recall, f1 = [0, 0, 0]
                count += 1
                score = TypeMetricScore(self.logger, self.get('runid'),
                                        document_id, gold_cluster_id,
                                        system_cluster_id, precision, recall,
                                        f1)
                scores.add(score)

        mean_f1 = mean_f1 / count
        mean_score = TypeMetricScore(self.logger,
                                     self.get('runid'),
                                     'Summary',
                                     '',
                                     '',
                                     '',
                                     '',
                                     mean_f1,
                                     summary=True)
        scores.add(mean_score)
        self.scores = scores
Esempio n. 12
0
    def score_responses(self):
        scores = []
        mean_similarities = {}
        counts = {}
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                similarity = 0
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('cluster', 'gold', document_id,
                                        gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Event', 'Relation']: continue
                if list(gold_cluster.get('dates').values())[0] is None:
                    self.record_event('NO_TEMPORAL_CONSTRAINT',
                                      gold_cluster_id, document_id)
                    continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id,
                                              system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event(
                            'UNEXPECTED_ALIGNED_CLUSTER_METATYPE',
                            system_cluster.get('metatype'), system_cluster_id,
                            metatype, gold_cluster_id)
                    if len(gold_cluster.get('dates').keys()) > 1:
                        self.record_event('UNEXPECTED_NUM_DATES',
                                          gold_cluster_id, document_id)
                    similarity = self.get(
                        'temporal_similarity',
                        list(gold_cluster.get('dates').values())[0],
                        list(system_cluster.get('dates').values()))
                for metatype_key in ['ALL', metatype]:
                    for language_key in ['ALL', language]:
                        key = '{language}:{metatype}'.format(
                            metatype=metatype_key, language=language_key)
                        mean_similarities[key] = mean_similarities.get(
                            key, 0) + similarity
                        counts[key] = counts.get(key, 0) + 1
                score = TemporalMetricScore(self.logger, self.get('runid'),
                                            document_id, language, metatype,
                                            gold_cluster_id, system_cluster_id,
                                            similarity)
                scores.append(score)

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores,
                               (('document_id', False), ('metatype', False),
                                ('gold_cluster_id', False),
                                ('system_cluster_id', False))):
            scores_printer.add(score)
        for key in sorted(mean_similarities, key=self.order):
            mean_similarity = mean_similarities[key] / counts[key] if counts[
                key] else 0
            language, metatype = key.split(':')
            mean_score = TemporalMetricScore(self.logger,
                                             self.get('runid'),
                                             'Summary',
                                             language,
                                             metatype,
                                             '',
                                             '',
                                             mean_similarity,
                                             summary=True)
            scores_printer.add(mean_score)
        self.scores = scores_printer
Esempio n. 13
0
    def score_responses(self):
        scores = []
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get('documents').get(document_id)
            language = document.get('language')
            self.record_event('ANNOTATED_TYPES_INFO', document_id, ','.join(self.get('annotated_regions').get('types_annotated_for_document', document_id)))
            document_gold_to_system = self.get('cluster_alignment').get('gold_to_system').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(gold_cluster_id).get('aligned_similarity')
                precision, recall, f1 = [0,0,0]
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('gold_responses').get('document_clusters').get(document_id).get(gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Entity', 'Event']: continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id, system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event('UNEXPECTED_ALIGNED_CLUSTER_METATYPE', system_cluster.get('metatype'), system_cluster_id, metatype, gold_cluster_id)
                    gold_types = set(gold_cluster.get('all_expanded_types'))
                    system_types = set()
                    if document_id in self.get('system_responses').get('document_clusters'):
                        system_types = set(self.get('system_responses').get('document_clusters').get(document_id).get(system_cluster_id).get('all_expanded_types'))
                    augmented_gold_types = self.get('augmented_types', document_id, gold_types)
                    augmented_system_types = self.get('augmented_types', document_id, system_types)
                    self.record_event('TYPE_METRIC_SCORE_INFO', self.__class__.__name__, 'TYPES_SUBMITTED', document_id, gold_cluster_id, ','.join(gold_types), system_cluster_id, ','.join(system_types))
                    self.record_event('TYPE_METRIC_SCORE_INFO', self.__class__.__name__, 'TYPES_SCORED', document_id, gold_cluster_id, ','.join(augmented_gold_types), system_cluster_id, ','.join(augmented_system_types))
                    precision, recall, f1 = get_precision_recall_and_f1(augmented_gold_types, augmented_system_types)
                score = TypeMetricScoreV1(logger=self.logger,
                                          run_id=self.get('run_id'),
                                          document_id=document_id,
                                          language=language,
                                          metatype=metatype,
                                          gold_cluster_id=gold_cluster_id,
                                          system_cluster_id=system_cluster_id,
                                          precision=precision,
                                          recall=recall,
                                          f1=f1)
                scores.append(score)
            # add scores unaligned system clusters
            document_system_to_gold = self.get('cluster_alignment').get('system_to_gold').get(document_id)
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(system_cluster_id).get('aligned_similarity')
                if system_cluster_id != 'None':
                    system_cluster = self.get('system_responses').get('document_clusters').get(document_id).get(system_cluster_id)
                    metatype = system_cluster.get('metatype')
                    if metatype not in ['Entity', 'Event']: continue
                    if gold_cluster_id == 'None':
                        precision, recall, f1 = [0,0,0]
                        score = TypeMetricScoreV1(logger=self.logger,
                                                  run_id=self.get('run_id'),
                                                  document_id=document_id,
                                                  language=language,
                                                  metatype=metatype,
                                                  gold_cluster_id=gold_cluster_id,
                                                  system_cluster_id=system_cluster_id,
                                                  precision=precision,
                                                  recall=recall,
                                                  f1=f1)
                        scores.append(score)
                    elif aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR', 'aligned_similarity=0')

        scores_printer = ScorePrinter(self.logger, self.printing_specs)
        for score in multisort(scores, (('document_id', False),
                                        ('metatype', False),
                                        ('gold_cluster_id', False),
                                        ('system_cluster_id', False))):
            scores_printer.add(score)
        self.aggregate_scores(scores_printer, TypeMetricScoreV1)
        self.scores = scores_printer
Esempio n. 14
0
    def score_responses(self):
        scores = []
        mean_f1s = {}
        counts = {}
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            document_system_to_gold = self.get('cluster_alignment').get(
                'system_to_gold').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                precision, recall, f1 = [0, 0, 0]
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('cluster', 'gold', document_id,
                                        gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Event', 'Relation']: continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id,
                                              system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event(
                            'UNEXPECTED_ALIGNED_CLUSTER_METATYPE',
                            system_cluster.get('metatype'), system_cluster_id,
                            metatype, gold_cluster_id)
                    gold_frame = self.get('frame', 'gold', document_id,
                                          gold_cluster_id)
                    gold_slot_fillers = {}
                    if gold_frame is None or len(
                            gold_frame.get('role_fillers')) == 0:
                        if gold_cluster.get('metatype') == 'Relation':
                            self.record_event('MISSING_GOLD_FRAME',
                                              gold_cluster.get('metatype'),
                                              gold_cluster_id, document_id,
                                              self.get('code_location'))
                        continue
                    for role_name in gold_frame.get('role_fillers'):
                        for gold_filler_cluster_id in gold_frame.get(
                                'role_fillers').get(role_name):
                            gold_slot_fillers['{}:{}'.format(
                                role_name, gold_filler_cluster_id)] = 1
                    system_frame = self.get('frame', 'system', document_id,
                                            system_cluster_id)
                    if system_frame:
                        system_slot_fillers = {}
                        for role_name in system_frame.get('role_fillers'):
                            for system_filler_cluster_id in system_frame.get(
                                    'role_fillers').get(role_name):
                                aligned_gold_filler_cluster_id = document_system_to_gold.get(
                                    system_filler_cluster_id).get('aligned_to')
                                aligned_gold_filler_cluster_id_similarity = document_system_to_gold.get(
                                    system_filler_cluster_id).get(
                                        'aligned_similarity')
                                if aligned_gold_filler_cluster_id != 'None':
                                    if aligned_gold_filler_cluster_id_similarity == 0:
                                        self.record_event(
                                            'DEFAULT_CRITICAL_ERROR',
                                            'aligned_similarity=0')
                                    system_slot_fillers['{}:{}'.format(
                                        role_name,
                                        aligned_gold_filler_cluster_id)] = 1
                                else:
                                    system_slot_fillers['{}:{}'.format(
                                        role_name,
                                        system_filler_cluster_id)] = 1
                        if len(gold_slot_fillers) and len(system_slot_fillers):
                            precision, recall, f1 = get_precision_recall_and_f1(
                                set(gold_slot_fillers.keys()),
                                set(system_slot_fillers.keys()))
                for metatype_key in ['ALL', metatype]:
                    for language_key in ['ALL', language]:
                        key = '{language}:{metatype}'.format(
                            metatype=metatype_key, language=language_key)
                        mean_f1s[key] = mean_f1s.get(key, 0) + f1
                        counts[key] = counts.get(key, 0) + 1
                score = FrameMetricScore(self.logger, self.get('runid'),
                                         document_id, language, metatype,
                                         gold_cluster_id, system_cluster_id,
                                         precision, recall, f1)
                scores.append(score)
            # add scores corresponding to unaligned system clusters
            precision, recall, f1 = [0, 0, 0]
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(
                    system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(
                    system_cluster_id).get('aligned_similarity')
                if system_cluster_id != 'None':
                    if gold_cluster_id == 'None':
                        metatype = self.get('cluster', 'system', document_id,
                                            system_cluster_id).get('metatype')
                        if metatype not in ['Event', 'Relation']: continue
                        for metatype_key in ['ALL', metatype]:
                            for language_key in ['ALL', language]:
                                key = '{language}:{metatype}'.format(
                                    metatype=metatype_key,
                                    language=language_key)
                                mean_f1s[key] = mean_f1s.get(key, 0) + f1
                                counts[key] = counts.get(key, 0) + 1
                        score = FrameMetricScore(self.logger,
                                                 self.get('runid'),
                                                 document_id, language,
                                                 metatype, gold_cluster_id,
                                                 system_cluster_id, precision,
                                                 recall, f1)
                        scores.append(score)
                    elif aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores,
                               (('document_id', False), ('metatype', False),
                                ('gold_cluster_id', False),
                                ('system_cluster_id', False))):
            scores_printer.add(score)
        for key in sorted(mean_f1s, key=self.order):
            mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0
            language, metatype = key.split(':')
            mean_score = FrameMetricScore(self.logger,
                                          self.get('runid'),
                                          'Summary',
                                          language,
                                          metatype,
                                          '',
                                          '',
                                          '',
                                          '',
                                          mean_f1,
                                          summary=True)
            scores_printer.add(mean_score)

        self.scores = scores_printer
Esempio n. 15
0
    def score_responses(self):
        scores = []
        mean_f1s = {}
        counts = {}
        for document_id in self.get('core_documents'):
            # add scores corresponding to all gold clusters
            document = self.get('gold_responses').get('document_mappings').get(
                'documents').get(document_id)
            language = document.get('language')
            self.record_event(
                'ANNOTATED_TYPES_INFO', document_id, ','.join(
                    self.get('annotated_regions').get(
                        'types_annotated_for_document', document_id)))
            document_gold_to_system = self.get('cluster_alignment').get(
                'gold_to_system').get(document_id)
            for gold_cluster_id in document_gold_to_system if document_gold_to_system else []:
                system_cluster_id = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_to')
                aligned_similarity = document_gold_to_system.get(
                    gold_cluster_id).get('aligned_similarity')
                precision, recall, f1 = [0, 0, 0]
                if gold_cluster_id == 'None': continue
                gold_cluster = self.get('gold_responses').get(
                    'document_clusters').get(document_id).get(gold_cluster_id)
                metatype = gold_cluster.get('metatype')
                if metatype not in ['Entity', 'Event']: continue
                if system_cluster_id != 'None':
                    if aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')
                    system_cluster = self.get('cluster', 'system', document_id,
                                              system_cluster_id)
                    if system_cluster.get('metatype') != metatype:
                        self.record_event(
                            'UNEXPECTED_ALIGNED_CLUSTER_METATYPE',
                            system_cluster.get('metatype'), system_cluster_id,
                            metatype, gold_cluster_id)
                    gold_types = set(gold_cluster.get('all_expanded_types'))
                    system_types = set()
                    if document_id in self.get('system_responses').get(
                            'document_clusters'):
                        system_types = set(
                            self.get('system_responses').
                            get('document_clusters').get(document_id).get(
                                system_cluster_id).get('all_expanded_types'))
                    augmented_gold_types = self.get('augmented_types',
                                                    document_id, gold_types)
                    augmented_system_types = self.get('augmented_types',
                                                      document_id,
                                                      system_types)
                    self.record_event('TEMPORAL_METRIC_SCORE_INFO',
                                      'TYPES_SUBMITTED', document_id,
                                      gold_cluster_id, ','.join(gold_types),
                                      system_cluster_id,
                                      ','.join(system_types))
                    self.record_event('TEMPORAL_METRIC_SCORE_INFO',
                                      'TYPES_SCORED', document_id,
                                      gold_cluster_id,
                                      ','.join(augmented_gold_types),
                                      system_cluster_id,
                                      ','.join(augmented_system_types))
                    precision, recall, f1 = get_precision_recall_and_f1(
                        augmented_gold_types, augmented_system_types)
                for metatype_key in ['ALL', metatype]:
                    for language_key in ['ALL', language]:
                        key = '{language}:{metatype}'.format(
                            metatype=metatype_key, language=language_key)
                        mean_f1s[key] = mean_f1s.get(key, 0) + f1
                        counts[key] = counts.get(key, 0) + 1
                score = TypeMetricScore(self.logger, self.get('runid'),
                                        document_id, language, metatype,
                                        gold_cluster_id, system_cluster_id,
                                        precision, recall, f1)
                scores.append(score)
            # add scores unaligned system clusters
            document_system_to_gold = self.get('cluster_alignment').get(
                'system_to_gold').get(document_id)
            for system_cluster_id in document_system_to_gold if document_system_to_gold else []:
                gold_cluster_id = document_system_to_gold.get(
                    system_cluster_id).get('aligned_to')
                aligned_similarity = document_system_to_gold.get(
                    system_cluster_id).get('aligned_similarity')
                if system_cluster_id != 'None':
                    system_cluster = self.get('system_responses').get(
                        'document_clusters').get(document_id).get(
                            system_cluster_id)
                    metatype = system_cluster.get('metatype')
                    if metatype not in ['Entity', 'Event']: continue
                    if gold_cluster_id == 'None':
                        precision, recall, f1 = [0, 0, 0]
                        for metatype_key in ['ALL', metatype]:
                            for language_key in ['ALL', language]:
                                key = '{language}:{metatype}'.format(
                                    metatype=metatype_key,
                                    language=language_key)
                                mean_f1s[key] = mean_f1s.get(key, 0) + f1
                                counts[key] = counts.get(key, 0) + 1
                        score = TypeMetricScore(self.logger, self.get('runid'),
                                                document_id, language,
                                                metatype, gold_cluster_id,
                                                system_cluster_id, precision,
                                                recall, f1)
                        scores.append(score)
                    elif aligned_similarity == 0:
                        self.record_event('DEFAULT_CRITICAL_ERROR',
                                          'aligned_similarity=0')

        scores_printer = ScorePrinter(self.logger, self.printing_specs,
                                      self.separator)
        for score in multisort(scores,
                               (('document_id', False), ('metatype', False),
                                ('gold_cluster_id', False),
                                ('system_cluster_id', False))):
            scores_printer.add(score)
        for key in sorted(mean_f1s, key=self.order):
            mean_f1 = mean_f1s[key] / counts[key] if counts[key] else 0
            language, metatype = key.split(':')
            mean_score = TypeMetricScore(self.logger,
                                         self.get('runid'),
                                         'Summary',
                                         language,
                                         metatype,
                                         '',
                                         '',
                                         '',
                                         '',
                                         mean_f1,
                                         summary=True)
            scores_printer.add(mean_score)
        self.scores = scores_printer