def load_data(self): """ Reads the file containing the mappings into Encodings. """ fh = FileHandler(self.logger, self.filename) for entry in fh.get('entries'): self.add(key=entry.get('encoding'), value=entry.get('modality'))
def load_data(self): """ Loads the data from the parent-children file into the DocumentMappings object. """ mappings = nested_dict() fh = FileHandler(self.logger, self.filename) self.fileheader = fh.get('header') for entry in fh: doceid = entry.get('doceid') docid = entry.get('docid') detype = entry.get('detype') delang = entry.get('lang_manual') mappings[doceid]['docids'][docid] = 1 mappings[doceid]['detype'] = detype mappings[doceid]['delang'] = delang.upper() for doceid in mappings: # TODO: next if doceid is n/a? delang = mappings[doceid]['delang'] detype = mappings[doceid]['detype'] modality = self.encodings.get(detype) for docid in mappings[doceid]['docids']: is_core = 0 if self.core_documents is not None and self.core_documents.exists(docid): is_core = 1 document = self.get('documents').get(docid, default=Document(self.logger, docid)) document.set('is_core', is_core) document_element = self.get('document_elements').get(doceid, default=DocumentElement(self.logger, doceid)) document_element.add_document(document) document_element.set('type', detype) document_element.set('modality', modality) document_element.set('language', delang) document.add_document_element(document_element)
def load_data(self): fh = FileHandler(self.logger, self.filename) for entry in fh.get('entries'): metatype = entry.get('metatype') container = self.get('containers').get(metatype) container.add(key=entry.get('ontology_id'), value=entry.get('full_type'))
def merge_files(self, input_files, output_file): print('--merging ...') print('--input:{}'.format('\n'.join(input_files))) print('--output:{}'.format(output_file)) header = None fhs = {} for filename_with_path in input_files: fh = FileHandler(self.get('logger'), filename_with_path, encoding='utf-8') if header is None: header = fh.get('header').get('line').strip() if header != fh.get('header').get('line').strip(): self.record_event('DEFAULT_CRITICAL_ERROR', 'Input file headers do not match') fhs[filename_with_path] = fh with open(output_file, 'w', encoding='utf-8') as program_output: program_output.write('{header}\n'.format(header=header)) for filename_with_path in fhs: fh = fhs[filename_with_path] for entry in fh: program_output.write( '{line}'.format(line=entry.get('line')))
def augment_file(self, input_file, output_file): print('--augmenting ...') print('--input:{}'.format(input_file)) print('--output:{}'.format(output_file)) missing_handles = ['[unknown]', '', '""'] fh = FileHandler(self.get('logger'), input_file, encoding='utf-8') with open(output_file, 'w', encoding='utf-8') as program_output: program_output.write( '{header}\n'.format(header=fh.get('header').get('line'))) for entry in fh: line = entry.get('line') handle_text = entry.get('?objectc_handle') if handle_text is not None: if handle_text in missing_handles: corrected_handle_text = self.get( 'handle_text', entry.get('?oinf_j_span')) if corrected_handle_text: entry.set('?objectc_handle', corrected_handle_text) self.record_event( 'DEFAULT_INFO', 'replacing missing handle \'{}\' with text \'{}\'' .format(handle_text, corrected_handle_text), entry.get('where')) line = '{}\n'.format('\t'.join([ entry.get(column) for column in entry.get( 'header').get('columns') ])) else: self.record_event( 'DEFAULT_INFO', "handle \'{}\' found to be missing but no replacements made" .format(handle_text), entry.get('where')) elif len(handle_text.split(':')) == 3: handle_span = handle_text pattern = re.compile( '^(\w+?):(\w+?):\((\S+),(\S+)\)-\((\S+),(\S+)\)$') match = pattern.match(handle_span) if match: handle_text_from_span = self.get( 'handle_text', handle_span) if handle_text_from_span: entry.set('?objectc_handle', handle_text_from_span) self.record_event( 'DEFAULT_INFO', 'replacing handle span \'{}\' with text \'{}\'' .format(handle_span, handle_text_from_span), entry.get('where')) line = '{}\n'.format('\t'.join([ entry.get(column) for column in entry.get( 'header').get('columns') ])) else: self.record_event( 'DEFAULT_INFO', "handle span \'{}\' found but not replaced with text" .format(handle_text), entry.get('where')) program_output.write('{line}'.format(line=line))