Ejemplo n.º 1
0
 def get_all_expanded_types(self):
     """
     For all the types asserted for the cluster, return a union
     of corresponding expanded types.
     """
     types = []
     for cluster_type in self.get('types'):
         expanded_types = get_expanded_types(self.get('metatype'), cluster_type)
         types.extend(expanded_types)
     return list({cluster_type:1 for cluster_type in types}.keys())
Ejemplo n.º 2
0
def run_filter_on_entry(entry, schema_name, filtered_clusters, annotated_regions, document_mappings, document_boundaries):
    logger = entry.get('logger')

    if schema_name not in ['AIDA_PHASE2_TASK1_AM_RESPONSE', 'AIDA_PHASE2_TASK1_CM_RESPONSE', 'AIDA_PHASE2_TASK1_TM_RESPONSE']:
        logger.record_event('DEFAULT_CRITICAL_ERROR', 'Unexpected schema name: {}'.format(schema_name), logger.get('code_location'))

    passes_filter = False

    if schema_name == 'AIDA_PHASE2_TASK1_AM_RESPONSE':
        cluster_type_to_keys = {
                    'subject': '{}:{}'.format(entry.get('kb_document_id'), entry.get('subject_cluster').get('ID')),
                    'object': '{}:{}'.format(entry.get('kb_document_id'), entry.get('object_cluster').get('ID'))
                    }
        passes_filter = True
        for cluster_type in cluster_type_to_keys:
            key = cluster_type_to_keys[cluster_type]
            if key in filtered_clusters:
                if not filtered_clusters[key]:
                    passes_filter = False
                    cluster_id = entry.get('{}_cluster'.format(cluster_type)).get('ID')
                    logger.record_event('DEFAULT_INFO', 'Entry fails the filter due to cluster: {}'.format(cluster_id), entry.get('where'))
                    break
            else:
                logger.record_event('MISSING_ENTRY_IN_LOOKUP_ERROR', key, 'filtered_clusters', logger.get('code_location'))

    elif schema_name == 'AIDA_PHASE2_TASK1_CM_RESPONSE':
        mention = get_mention(entry, document_mappings, document_boundaries)
        key = '{}:{}'.format(entry.get('kb_document_id'), entry.get('cluster').get('ID'))
        if key not in filtered_clusters:
            filtered_clusters[key] = False
        if annotated_regions.contains(mention, get_expanded_types(entry.get('metatype'), entry.get('cluster_type'))):
            passes_filter = True
            filtered_clusters[key] = True
        else:
            logger.record_event('DEFAULT_INFO', 'Entry fails the filter due to mention: {}'.format(mention.get('span_string')), entry.get('where'))

    elif schema_name == 'AIDA_PHASE2_TASK1_TM_RESPONSE':
        cluster_id = entry.get('cluster').get('ID')
        key = '{}:{}'.format(entry.get('kb_document_id'), cluster_id)
        if key in filtered_clusters:
            passes_filter = filtered_clusters[key]
            if not passes_filter:
                logger.record_event('DEFAULT_INFO', 'Entry fails the filter due to cluster: {}'.format(cluster_id), entry.get('where'))
        else:
            logger.record_event('MISSING_ENTRY_IN_LOOKUP_ERROR', key, 'filtered_clusters', logger.get('code_location'))

    entry.set('passes_filter', passes_filter)