예제 #1
0
 def get_date_range(self, responses, entry, date_name):
     date_range = Object(entry.get('logger'))
     present = False
     for range_field in ['before', 'after']:
         date_range.set(range_field, entry.get('{}_{}'.format(date_name, range_field)))
         if date_range.get(range_field): present = True
     return date_range if present else None
예제 #2
0
 def generate_date_start_and_end(self, response, entry):
     date_object = Object(entry.get('logger'))
     present = False
     for field_name in ['start', 'end']:
         if entry.get(field_name):
             present = True
             date_object.set(field_name, entry.get(field_name))
     entry.set('date', date_object if present else None)
     entry.get('subject_cluster').get('dates').add(entry.get('date'))
예제 #3
0
파일: generator.py 프로젝트: shahraj81/aida
 def generate_date_start_and_end(self, response, entry):
     date_object = Object(entry.get('logger'))
     present = False
     for field_name in ['start', 'end']:
         if entry.get(field_name):
             present = True
             date_object.set(field_name, entry.get(field_name))
     entry.set('date', date_object if present else None)
     if entry.get('schema').get('name') in [
             'AIDA_PHASE2_TASK1_TM_RESPONSE',
             'AIDA_PHASE3_TASK3_TM_RESPONSE'
     ]:
         entry.get('subject_cluster').get('dates').add(entry.get('date'))
예제 #4
0
 def get_date(self, responses, entry, date_name):
     date_fields = ['month', 'day', 'year']
     date_field_values = {key: trim(entry.get(field_name))
                             for key, field_name in {key:'{}_{}'.format(date_name, key) for key in date_fields}.items()}
     date_object = Object(entry.get('logger'))
     present = False
     for date_field in date_fields:
         date_object.set(date_field, None if date_field_values[date_field]=='' else int(date_field_values[date_field]))
         if date_object.get(date_field): present = True
     # consider all date fields to be missing if year was missing even if day and month were provided
     if present and not date_object.get('year'): present = False
     if present and date_object.get('day') and not date_object.get('month'):
         date_object.set('day', None)
     return date_object if present else None
예제 #5
0
파일: generator.py 프로젝트: shahraj81/aida
    def get_date(self, responses, entry, date_name):
        date_fields = {'month': 'xx', 'day': 'xx', 'year': 'xxxx'}
        date_field_values = {
            key: trim(entry.get(field_name))
            for key, field_name in
            {key: '{}_{}'.format(date_name, key)
             for key in date_fields}.items()
        }
        field_names_missing = []
        date_object = Object(entry.get('logger'))
        for date_field in date_fields:
            date_object.set(
                date_field, None if date_field_values[date_field] == '' else
                int(date_field_values[date_field]))
            if date_field_values[date_field] == '':
                field_names_missing.append(date_field)
                date_field_values[date_field] = date_fields[date_field]

        if len(field_names_missing) > 0:
            unspecified_date = '{year}-{month}-{day}'.format(
                day=date_field_values['day'],
                month=date_field_values['month'],
                year=date_field_values['year'])
            start_or_end, before_or_after = date_name.split('_')
            corrected_date = LDCTime(self.get('logger'), unspecified_date,
                                     start_or_end, before_or_after,
                                     entry.get('where'))
            # update date_object
            if 'year' in field_names_missing:
                date_object = None
            else:
                missing_fields = ','.join(field_names_missing)
                self.record_event('MISSING_DATE_FIELD',
                                  date_name, missing_fields,
                                  corrected_date.__str__(), date_name,
                                  entry.get('where'))
                for date_field in date_fields:
                    date_object.set(
                        date_field,
                        int(corrected_date.get(date_field).__str__()))
                    entry.set(
                        '{}_{}'.format(date_name, date_field), '"{}"'.format(
                            corrected_date.get(date_field).__str__()))
        return date_object
예제 #6
0
 def get_document_type_role_fillers(self, system_or_gold, document_id):
     logger = self.get('logger')
     type_role_fillers = Container(logger)
     responses = self.get('{}_responses'.format(system_or_gold))
     if document_id in responses.get('document_frames'):
         for frame in responses.get('document_frames').get(
                 document_id).values():
             metatype = frame.get('metatype')
             role_fillers = frame.get('role_fillers')
             for role_name in role_fillers:
                 for filler_cluster_id in role_fillers.get(role_name):
                     for predicate_justification in role_fillers.get(
                             role_name).get(filler_cluster_id):
                         type_invoked = self.get('type_invoked',
                                                 predicate_justification,
                                                 role_name)
                         type_role_filler_string = '{type_invoked}_{role_name}:{filler_cluster_id}'.format(
                             type_invoked=type_invoked,
                             role_name=role_name,
                             filler_cluster_id=filler_cluster_id)
                         type_role_filler = type_role_fillers.get(
                             type_role_filler_string,
                             default=Object(logger))
                         type_role_filler.set('metatype', metatype)
                         type_role_filler.set('type', type_invoked)
                         type_role_filler.set('role_name', role_name)
                         type_role_filler.set('filler_cluster_id',
                                              filler_cluster_id)
                         if type_role_filler.get(
                                 'predicate_justifications') is None:
                             type_role_filler.set(
                                 'predicate_justifications',
                                 Container(logger))
                         type_role_filler.get('predicate_justifications'
                                              ).add(predicate_justification)
     return type_role_fillers
예제 #7
0
def spanstring_to_object(logger, span_string, where=None):
    pattern = re.compile('^(.*?):(.*?):\((\S+),(\S+)\)-\((\S+),(\S+)\)$')
    match = pattern.match(span_string)
    mention = Object(logger)
    if match:
        document_id = match.group(1)
        document_element_id, keyframe_id = parse_document_element_id(match.group(2))
        span = Span(logger, match.group(3), match.group(4), match.group(5), match.group(6))
        mention.set('span_string', span_string)
        mention.set('document_id', document_id)
        mention.set('document_element_id', document_element_id)
        mention.set('keyframe_id', keyframe_id)
        mention.set('span', span)
        mention.set('where', where)
    else:
        logger.record_event('UNEXPECTED_SPAN_FORMAT', span_string, where)
    return mention
예제 #8
0
 def update(self, entry):
     event_or_relation_type, rolename = entry.get('?predicate').split('_')
     if self.get('metatype') is None:
         self.set('metatype', entry.get('?metatype'))
     if self.get('metatype') != entry.get('?metatype'):
         self.record_event('METATYPE_MISMATCH', self.get('ID'),
                           self.get('metatype'), entry.get('?metatype'),
                           entry.get('where'))
     self.get('types')[event_or_relation_type] = 1
     filler = Object(self.get('logger'))
     filler_cluster_id = entry.get('?object')
     filler.set('filler_cluster_id', filler_cluster_id)
     filler.set('predicate_justification',
                entry.get('?predicate_justification'))
     filler.set('argument_assertion_confidence',
                entry.get('?argument_assertion_confidence'))
     filler.set('predicate_justification_confidence',
                entry.get('?predicate_justification_confidence'))
     filler.set('where', entry.get('where'))
     if rolename not in self.get('role_fillers'):
         self.get('role_fillers')[rolename] = {}
     if filler_cluster_id not in self.get('role_fillers')[rolename]:
         self.get('role_fillers')[rolename][filler_cluster_id] = []
     self.get('role_fillers')[rolename][filler_cluster_id].append(filler)
예제 #9
0
 def load_classquery_assessments(self):
     next_fqec_num = 1001
     generated_fqecs = {}
     query_type = 'ClassQuery'
     path = '{}/data/class/*/*.tab'.format(self.assessments_dir)
     header = FileHeader(
         self.logger, "\t".join(assessments.get(query_type).get('columns')))
     for filename in glob.glob(path):
         for entry in FileHandler(self.logger, filename, header):
             queryid, docid, mention_span, assessment_read, fqec_read, where = map(
                 lambda key: entry.get(key), [
                     'queryid', 'docid', 'mention_span', 'assessment',
                     'fqec', 'where'
                 ])
             assessment = self.normalize('assessment', assessment_read)
             query_and_document = '{}:{}'.format(queryid, docid)
             key = '{}:{}'.format(query_and_document, mention_span)
             if self.exists(key):
                 self.logger.record_event('MULTIPLE_ASSESSMENTS', key,
                                          where)
             fqec = fqec_read
             if fqec == 'NIL' and self.normalize('assessment',
                                                 assessment) == 'CORRECT':
                 if key not in generated_fqecs:
                     fqec = 'NILG{}'.format(next_fqec_num)
                     generated_fqecs[key] = fqec
                 fqec = generated_fqecs[key]
             assessment_entry = Object(self.logger)
             assessment_entry.set('assessment', assessment)
             assessment_entry.set('docid', docid)
             assessment_entry.set('queryid', queryid)
             assessment_entry.set('mention_span', mention_span)
             assessment_entry.set('fqec_read', fqec_read)
             assessment_entry.set('fqec', fqec)
             assessment_entry.set('where', where)
             if not self.exists(key):
                 self.add(key=key, value=assessment_entry)
             line = 'QUERYID={} DOCID={} MENTION={} ASSESSMENT={} FQEC_READ={} FQEC={}'.format(
                 queryid, docid, mention_span, assessment, fqec_read, fqec)
             self.logger.record_event('GROUND_TRUTH', line, where)
예제 #10
0
    def load_task2_assessments(self):
        next_fqec_num = 1001
        generated_fqecs = {}
        path = '{}/data/zero-hop/*.tab'.format(self.assessments_dir)
        header =  FileHeader(self.logger, "\t".join(assessments.get('task2').get('across_documents_coreference').get('columns')))
        for filename in glob.glob(path):
            for entry in FileHandler(self.logger, filename, header):
                queryid, docid, mention_span, assessment_read, fqec_read, where = map(
                    lambda key: entry.get(key),
                    ['queryid', 'docid', 'mention_span', 'assessment', 'fqec', 'where']
                    )
                entity_id = self.get('queries_to_score').get(queryid).get('entity_id')
                assessment = self.normalize('assessment', assessment_read)
                query_and_document = '{}:{}'.format(queryid, docid)
                key = '{}:{}'.format(query_and_document, mention_span)
                if self.exists(key):
                    self.logger.record_event('MULTIPLE_ASSESSMENTS', key, where)
                fqec = fqec_read
                if fqec == 'NIL' and self.normalize('assessment', assessment) == 'CORRECT':
                    if key not in generated_fqecs:
                        fqec = 'NILG{}'.format(next_fqec_num)
                        generated_fqecs[key] = fqec
                    fqec = generated_fqecs[key]
                assessment_entry = Object(self.logger)
                assessment_entry.set('assessment', assessment)
                assessment_entry.set('docid', docid)
                assessment_entry.set('queryid', queryid)
                assessment_entry.set('mention_span', mention_span)
                assessment_entry.set('fqec_read', fqec_read)
                assessment_entry.set('fqec', fqec)
                assessment_entry.set('line', entry.get('line'))
                assessment_entry.set('where', where)

                if not self.exists(queryid):
                    self.add(key=queryid, value=Container(self.get('logger')))
                self.get(queryid).add(key=':'.join(key.split(':')[1:]), value=assessment_entry)

                line = 'ENTITYID={} QUERYID={} DOCID={} MENTION={} ASSESSMENT={} FQEC_READ={} FQEC={}'.format(
                    entity_id, queryid, docid, mention_span, assessment, fqec_read, fqec)
                self.logger.record_event('GROUND_TRUTH', line, where)