예제 #1
0
 def load_data(self):
     """
     Reads the file containing the mappings into Encodings.
     """
     fh = FileHandler(self.logger, self.filename)
     for entry in fh.get('entries'):
         self.add(key=entry.get('encoding'), value=entry.get('modality'))
예제 #2
0
 def load_data(self):
     """
     Loads the data from the parent-children file into the DocumentMappings object.
     """
     mappings = nested_dict()
     fh = FileHandler(self.logger, self.filename)
     self.fileheader = fh.get('header')
     for entry in fh:
         doceid = entry.get('doceid')
         docid = entry.get('docid')
         detype = entry.get('detype')
         delang = entry.get('lang_manual')
         mappings[doceid]['docids'][docid] = 1
         mappings[doceid]['detype'] = detype
         mappings[doceid]['delang'] = delang.upper()
     for doceid in mappings:
         # TODO: next if doceid is n/a?
         delang = mappings[doceid]['delang']
         detype = mappings[doceid]['detype']
         modality = self.encodings.get(detype)
         for docid in mappings[doceid]['docids']:
             is_core = 0
             if self.core_documents is not None and self.core_documents.exists(docid):
                 is_core = 1
             document = self.get('documents').get(docid, default=Document(self.logger, docid))
             document.set('is_core', is_core)
             document_element = self.get('document_elements').get(doceid, default=DocumentElement(self.logger, doceid))
             document_element.add_document(document)
             document_element.set('type', detype)
             document_element.set('modality', modality)
             document_element.set('language', delang)
             document.add_document_element(document_element)
예제 #3
0
 def load_data(self):
     fh = FileHandler(self.logger, self.filename)
     for entry in fh.get('entries'):
         metatype = entry.get('metatype')
         container = self.get('containers').get(metatype)
         container.add(key=entry.get('ontology_id'),
                       value=entry.get('full_type'))
예제 #4
0
 def merge_files(self, input_files, output_file):
     print('--merging ...')
     print('--input:{}'.format('\n'.join(input_files)))
     print('--output:{}'.format(output_file))
     header = None
     fhs = {}
     for filename_with_path in input_files:
         fh = FileHandler(self.get('logger'),
                          filename_with_path,
                          encoding='utf-8')
         if header is None:
             header = fh.get('header').get('line').strip()
         if header != fh.get('header').get('line').strip():
             self.record_event('DEFAULT_CRITICAL_ERROR',
                               'Input file headers do not match')
         fhs[filename_with_path] = fh
     with open(output_file, 'w', encoding='utf-8') as program_output:
         program_output.write('{header}\n'.format(header=header))
         for filename_with_path in fhs:
             fh = fhs[filename_with_path]
             for entry in fh:
                 program_output.write(
                     '{line}'.format(line=entry.get('line')))
예제 #5
0
    def augment_file(self, input_file, output_file):
        print('--augmenting ...')
        print('--input:{}'.format(input_file))
        print('--output:{}'.format(output_file))

        missing_handles = ['[unknown]', '', '""']

        fh = FileHandler(self.get('logger'), input_file, encoding='utf-8')
        with open(output_file, 'w', encoding='utf-8') as program_output:
            program_output.write(
                '{header}\n'.format(header=fh.get('header').get('line')))
            for entry in fh:
                line = entry.get('line')
                handle_text = entry.get('?objectc_handle')
                if handle_text is not None:
                    if handle_text in missing_handles:
                        corrected_handle_text = self.get(
                            'handle_text', entry.get('?oinf_j_span'))
                        if corrected_handle_text:
                            entry.set('?objectc_handle', corrected_handle_text)
                            self.record_event(
                                'DEFAULT_INFO',
                                'replacing missing handle \'{}\' with text \'{}\''
                                .format(handle_text, corrected_handle_text),
                                entry.get('where'))
                            line = '{}\n'.format('\t'.join([
                                entry.get(column) for column in entry.get(
                                    'header').get('columns')
                            ]))
                        else:
                            self.record_event(
                                'DEFAULT_INFO',
                                "handle \'{}\' found to be missing but no replacements made"
                                .format(handle_text), entry.get('where'))
                    elif len(handle_text.split(':')) == 3:
                        handle_span = handle_text
                        pattern = re.compile(
                            '^(\w+?):(\w+?):\((\S+),(\S+)\)-\((\S+),(\S+)\)$')
                        match = pattern.match(handle_span)
                        if match:
                            handle_text_from_span = self.get(
                                'handle_text', handle_span)
                            if handle_text_from_span:
                                entry.set('?objectc_handle',
                                          handle_text_from_span)
                                self.record_event(
                                    'DEFAULT_INFO',
                                    'replacing handle span \'{}\' with text \'{}\''
                                    .format(handle_span,
                                            handle_text_from_span),
                                    entry.get('where'))
                                line = '{}\n'.format('\t'.join([
                                    entry.get(column) for column in entry.get(
                                        'header').get('columns')
                                ]))
                            else:
                                self.record_event(
                                    'DEFAULT_INFO',
                                    "handle span \'{}\' found but not replaced with text"
                                    .format(handle_text), entry.get('where'))
                program_output.write('{line}'.format(line=line))