def __split_MSH_by_PID(filename): """Assumes: - ONE MSH per file - utf8 encoding - first non-empty line must be MSH line - next line must be PID line IOW, what's created by __split_hl7_file_by_MSH() """ _log.debug('splitting single-MSH file [%s] into single-PID files', filename) MSH_in = io.open(filename, mode = 'rt', encoding = 'utf8') looking_for_MSH = True MSH_line = None looking_for_first_PID = True PID_file = None PID_fnames = [] idx = 0 for line in MSH_in: line = line.strip() # ignore empty if line == '': continue # first non-empty line must be MSH if looking_for_MSH: if line.startswith('MSH|'): looking_for_MSH = False MSH_line = line + HL7_EOL continue raise ValueError('HL7 MSH file <%s> does not start with "MSH" line' % filename) else: if line.startswith('MSH|'): raise ValueError('HL7 single-MSH file <%s> contains more than one MSH line' % filename) # first non-empty line after MSH must be PID if looking_for_first_PID: if not line.startswith('PID|'): raise ValueError('HL7 MSH file <%s> does not have "PID" line follow "MSH" line' % filename) looking_for_first_PID = False # start new file if line is PID if line.startswith('PID|'): if PID_file is not None: PID_file.close() idx += 1 out_fname = gmTools.get_unique_filename(prefix = '%s-PID_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7') _log.debug('writing message for PID %s to [%s]', idx, out_fname) PID_fnames.append(out_fname) PID_file = io.open(out_fname, mode = 'wt', encoding = 'utf8', newline = '') PID_file.write(MSH_line) # else write line to new file PID_file.write(line + HL7_EOL) if PID_file is not None: PID_file.close() MSH_in.close() return PID_fnames
def __split_hl7_file_by_MSH(filename, encoding='utf8'): _log.debug('splitting [%s] into single-MSH files', filename) hl7_in = io.open(filename, mode = 'rt', encoding = encoding) idx = 0 first_line = True MSH_file = None MSH_fnames = [] for line in hl7_in: line = line.strip() # first line must be MSH if first_line: # ignore empty / FHS / BHS lines if line == '': continue if line.startswith('FHS|'): _log.debug('ignoring FHS') continue if line.startswith('BHS|'): _log.debug('ignoring BHS') continue if not line.startswith('MSH|'): raise ValueError('HL7 file <%s> does not start with "MSH" line' % filename) first_line = False # start new file if line.startswith('MSH|'): if MSH_file is not None: MSH_file.close() idx += 1 out_fname = gmTools.get_unique_filename(prefix = '%s-MSH_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7') _log.debug('writing message %s to [%s]', idx, out_fname) MSH_fnames.append(out_fname) MSH_file = io.open(out_fname, mode = 'wt', encoding = 'utf8', newline = '') # ignore BTS / FTS lines if line.startswith('BTS|'): _log.debug('ignoring BTS') continue if line.startswith('FTS|'): _log.debug('ignoring FTS') continue # else write line to new file MSH_file.write(line + HL7_EOL) if MSH_file is not None: MSH_file.close() hl7_in.close() return MSH_fnames
def extract_HL7_from_XML_CDATA(filename, xml_path, target_dir=None): _log.debug('extracting HL7 from CDATA of <%s> nodes in XML file [%s]', xml_path, filename) # sanity checks/setup try: open(filename).close() orig_dir = os.path.split(filename)[0] work_filename = gmTools.get_unique_filename(prefix = 'gm-x2h-%s-' % gmTools.fname_stem(filename), suffix = '.hl7') if target_dir is None: target_dir = os.path.join(orig_dir, 'HL7') done_dir = os.path.join(orig_dir, 'done') else: done_dir = os.path.join(target_dir, 'done') _log.debug('target dir: %s', target_dir) gmTools.mkdir(target_dir) gmTools.mkdir(done_dir) except Exception: _log.exception('cannot setup unwrapping environment') return None hl7_xml = pyxml.ElementTree() try: hl7_xml.parse(filename) except pyxml.ParseError: _log.exception('cannot parse [%s]' % filename) return None nodes = hl7_xml.findall(xml_path) if len(nodes) == 0: _log.debug('no nodes found for data extraction') return None _log.debug('unwrapping HL7 from XML into [%s]', work_filename) hl7_file = io.open(work_filename, mode = 'wt', encoding = 'utf8', newline = '') # universal newlines acceptance but no translation on output for node in nodes: # hl7_file.write(node.text.rstrip() + HL7_EOL) hl7_file.write(node.text + '') # trick to make node.text unicode hl7_file.close() target_fname = os.path.join(target_dir, os.path.split(work_filename)[1]) shutil.copy(work_filename, target_dir) shutil.move(filename, done_dir) return target_fname
def __fix_malformed_hl7_file(filename, encoding='utf8'): _log.debug(u'fixing HL7 file [%s]', filename) # first pass: # - remove empty lines # - normalize line endings # - unwrap wrapped segments out1_fname = gmTools.get_unique_filename ( prefix = u'gm_fix1-%s-' % gmTools.fname_stem(filename), suffix = u'.hl7' ) hl7_in = io.open(filename, mode = 'rt', encoding = encoding) hl7_out = io.open(out1_fname, mode = 'wt', encoding = 'utf8') is_first_line = True for line in hl7_in: # skip empty line if line.strip().strip(u'\r').strip(u'\n').strip(u'\r').strip(u'\n').strip() == u'': continue # starts with known segment ? segment = line[:3] if (segment in HL7_SEGMENTS) and (line[3] == u'|'): if not is_first_line: hl7_out.write(HL7_EOL) else: is_first_line = False hl7_out.write(line.rstrip(u'\r').rstrip(u'\n').rstrip(u'\r').rstrip(u'\n')) hl7_out.close() hl7_in.close() # second pass: # - normalize # of fields per line out2_fname = gmTools.get_unique_filename ( prefix = u'gm_fix2-%s-' % gmTools.fname_stem(filename), suffix = '.hl7' ) hl7_in = io.open(out1_fname, mode = 'rt', encoding = 'utf8') hl7_out = io.open(out2_fname, mode = 'wt', encoding = 'utf8') for line in hl7_in: line = line.strip(HL7_EOL) seg_type = line[:3] # assumption: field separator = '|' field_count = line.count(u'|') + 1 # assumption: no '|' in data ... try: required_fields = HL7_segment2field_count[seg_type] except KeyError: required_fields = field_count missing_fields_count = required_fields - field_count if missing_fields_count > 0: line += (u'|' * missing_fields_count) hl7_out.write(line + HL7_EOL) hl7_out.close() hl7_in.close() # third pass: # - unsplit same-name, same-time, text-type OBX segments out3_fname = gmTools.get_unique_filename ( prefix = u'gm_fix3-%s-' % gmTools.fname_stem(filename), suffix = '.hl7' ) hl7_in = io.open(out2_fname, mode = 'rt', encoding = 'utf8') hl7_out = io.open(out3_fname, mode = 'wt', encoding = 'utf8') prev_identity = None prev_fields = None for line in hl7_in: if not line.startswith(u'OBX|'): if prev_fields is not None: hl7_out.write(u'|'.join(prev_fields) + HL7_EOL) hl7_out.write(line) prev_identity = None prev_fields = None curr_fields = None continue line = line.strip(HL7_EOL) # first OBX curr_fields = line.split(u'|') if curr_fields[OBX_field__datatype] != u'FT': hl7_out.write(line + HL7_EOL) prev_identity = None prev_fields = None curr_fields = None continue # first FT type OBX if prev_fields is None: prev_fields = line.split(u'|') prev_identity = line.split(u'|') prev_identity[OBX_field__set_id] = u'' prev_identity[OBX_field__subid] = u'' prev_identity[OBX_field__value] = u'' prev_identity = u'|'.join(prev_identity) continue # non-first FT type OBX curr_identity = line.split(u'|') curr_identity[OBX_field__set_id] = u'' curr_identity[OBX_field__subid] = u'' curr_identity[OBX_field__value] = u'' curr_identity = u'|'.join(curr_identity) if curr_identity != prev_identity: # write out previous line hl7_out.write(u'|'.join(prev_fields) + HL7_EOL) # keep current fields, since it may start a "repeat FT type OBX block" prev_fields = curr_fields prev_identity = curr_identity continue if prev_fields[OBX_field__value].endswith(u'\.br\\'): prev_fields[OBX_field__value] += curr_fields[OBX_field__value] else: if curr_fields[OBX_field__value].startswith(u'\.br\\'): prev_fields[OBX_field__value] += curr_fields[OBX_field__value] else: prev_fields[OBX_field__value] += u'\.br\\' prev_fields[OBX_field__value] += curr_fields[OBX_field__value] if prev_fields is not None: hl7_out.write(u'|'.join(prev_fields) + HL7_EOL) hl7_out.close() hl7_in.close() return out3_fname
def stage_single_PID_hl7_file(filename, source=None, encoding='utf8'): """Multi-step processing of HL7 files. - input must be single-MSH / single-PID / normalized HL7 - imports into clin.incoming_data_unmatched - needs write permissions in dir_of(filename) - moves PID files which were successfully staged into dir_of(filename)/done/PID/ """ local_log_name = gmTools.get_unique_filename ( prefix = gmTools.fname_stem(filename) + '-', suffix = '.stage.log' ) local_logger = logging.FileHandler(local_log_name) local_logger.setLevel(logging.DEBUG) root_logger = logging.getLogger('') root_logger.addHandler(local_logger) _log.info(u'staging [%s] as unmatched incoming HL7%s', filename, gmTools.coalesce(source, u'', u' (%s)')) _log.debug(u'log file: %s', local_log_name) # sanity checks/setup try: open(filename).close() orig_dir = os.path.split(filename)[0] done_dir = os.path.join(orig_dir, u'done') gmTools.mkdir(done_dir) error_dir = os.path.join(orig_dir, u'failed') gmTools.mkdir(error_dir) except Exception: _log.exception('cannot setup staging environment') root_logger.removeHandler(local_logger) return False # stage try: inc = create_incoming_data(u'HL7%s' % gmTools.coalesce(source, u'', u' (%s)'), filename) if inc is None: _log.error(u'cannot stage PID file: %s', filename) root_logger.removeHandler(local_logger) shutil.move(filename, error_dir) shutil.move(local_log_name, error_dir) return False inc.update_data_from_file(fname = filename) except Exception: _log.exception(u'error staging PID file') root_logger.removeHandler(local_logger) shutil.move(filename, error_dir) shutil.move(local_log_name, error_dir) return False # set additional data MSH_file = io.open(filename, mode = 'rt', encoding = 'utf8') raw_hl7 = MSH_file.read(1024 * 1024 * 5) # 5 MB max MSH_file.close() shutil.move(filename, done_dir) inc['comment'] = format_hl7_message ( message = raw_hl7, skip_empty_fields = True, eol = u'\n' ) HL7 = pyhl7.parse(raw_hl7) del raw_hl7 inc['comment'] += u'\n' inc['comment'] += (u'-' * 80) inc['comment'] += u'\n\n' log = io.open(local_log_name, mode = 'rt', encoding = 'utf8') inc['comment'] += log.read() log.close() try: inc['lastnames'] = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__lastname) inc['firstnames'] = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__firstname) val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__middlename) if val is not None: inc['firstnames'] += u' ' inc['firstnames'] += val val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__dob) if val is not None: tmp = time.strptime(val, '%Y%m%d') inc['dob'] = pyDT.datetime(tmp.tm_year, tmp.tm_mon, tmp.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone) val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__gender) if val is not None: inc['gender'] = val inc['external_data_id'] = filename #u'fk_patient_candidates', # u'request_id', # request ID as found in <data> # u'postcode', # u'other_info', # other identifying info in .data # u'requestor', # Requestor of data (e.g. who ordered test results) if available in source data. # u'fk_identity_disambiguated', # u'comment', # a free text comment on this row, eg. why is it here, error logs etc # u'fk_provider_disambiguated' # The provider the data is relevant to. except Exception: _log.exception(u'cannot add more data') inc.save() _log.info(u'successfully staged') root_logger.removeHandler(local_logger) shutil.move(local_log_name, done_dir) return True
def split_hl7_file(filename, target_dir=None, encoding='utf8'): """Multi-step processing of HL7 files. - input can be multi-MSH / multi-PID / partially malformed HL7 - tries to fix oddities - splits by MSH - splits by PID into <target_dir> - needs write permissions in dir_of(filename) - moves HL7 files which were successfully split up into dir_of(filename)/done/ - returns (True|False, list_of_PID_files) """ local_log_name = gmTools.get_unique_filename ( prefix = gmTools.fname_stem(filename) + '-', suffix = '.split.log' ) local_logger = logging.FileHandler(local_log_name) local_logger.setLevel(logging.DEBUG) root_logger = logging.getLogger('') root_logger.addHandler(local_logger) _log.info('splitting HL7 file: %s', filename) _log.debug('log file: %s', local_log_name) # sanity checks/setup try: open(filename).close() orig_dir = os.path.split(filename)[0] done_dir = os.path.join(orig_dir, u'done') gmTools.mkdir(done_dir) error_dir = os.path.join(orig_dir, u'failed') gmTools.mkdir(error_dir) work_filename = gmTools.get_unique_filename(prefix = gmTools.fname_stem(filename) + '-', suffix = '.hl7') if target_dir is None: target_dir = os.path.join(orig_dir, u'PID') _log.debug('target dir: %s', target_dir) gmTools.mkdir(target_dir) except Exception: _log.exception('cannot setup splitting environment') root_logger.removeHandler(local_logger) return False, None # split target_names = [] try: shutil.copy(filename, work_filename) fixed_filename = __fix_malformed_hl7_file(work_filename, encoding = encoding) MSH_fnames = __split_hl7_file_by_MSH(fixed_filename, encoding) PID_fnames = [] for MSH_fname in MSH_fnames: PID_fnames.extend(__split_MSH_by_PID(MSH_fname)) for PID_fname in PID_fnames: shutil.move(PID_fname, target_dir) target_names.append(os.path.join(target_dir, os.path.split(PID_fname)[1])) except Exception: _log.exception('cannot split HL7 file') for target_name in target_names: try: os.remove(target_name) except: pass root_logger.removeHandler(local_logger) shutil.move(local_log_name, error_dir) return False, None _log.info('successfully split') root_logger.removeHandler(local_logger) try: shutil.move(filename, done_dir) shutil.move(local_log_name, done_dir) except shutil.Error: _log.exception('cannot move hl7 file or log file to holding area') return True, target_names
def __fix_malformed_hl7_file(filename, encoding='utf8'): _log.debug('fixing HL7 file [%s]', filename) # first pass: # - remove empty lines # - normalize line endings # - unwrap wrapped segments (based on the assumption that segments are wrapped until a line starts with a known segment marker) out1_fname = gmTools.get_unique_filename ( prefix = 'gm_fix1-%s-' % gmTools.fname_stem(filename), suffix = '.hl7' ) hl7_in = io.open(filename, mode = 'rt', encoding = encoding) # universal newlines: translate any type of EOL to \n hl7_out = io.open(out1_fname, mode = 'wt', encoding = 'utf8', newline = '') # newline='' -> no translation of EOL at all is_first_line = True for line in hl7_in: # skip empty line if line.strip() == '': continue # starts with known segment ? segment = line[:3] if (segment in HL7_SEGMENTS) and (line[3] == '|'): if not is_first_line: hl7_out.write(HL7_EOL) else: is_first_line = False else: hl7_out.write(' ') hl7_out.write(line.rstrip()) hl7_out.write(HL7_EOL) hl7_out.close() hl7_in.close() # second pass: # - normalize # of fields per line # - remove '\.br.\'-only fields ;-) out2_fname = gmTools.get_unique_filename ( prefix = 'gm_fix2-%s-' % gmTools.fname_stem(filename), suffix = '.hl7' ) # we can now _expect_ lines to end in HL7_EOL, anything else is an error hl7_in = io.open(out1_fname, mode = 'rt', encoding = 'utf8', newline = HL7_EOL) hl7_out = io.open(out2_fname, mode = 'wt', encoding = 'utf8', newline = '') for line in hl7_in: line = line.strip() seg_type = line[:3] # assumption: field separator = '|' field_count = line.count('|') + 1 # assumption: no '|' in data ... try: required_fields = HL7_segment2field_count[seg_type] except KeyError: required_fields = field_count missing_fields_count = required_fields - field_count if missing_fields_count > 0: line += ('|' * missing_fields_count) cleaned_fields = [] for field in line.split('|'): if field.replace(HL7_BRK, '').strip() == '': cleaned_fields.append('') continue cleaned = gmTools.strip_prefix(field, HL7_BRK, remove_repeats = True, remove_whitespace = True) cleaned = gmTools.strip_suffix(cleaned, HL7_BRK, remove_repeats = True, remove_whitespace = True) cleaned_fields.append(cleaned) hl7_out.write('|'.join(cleaned_fields) + HL7_EOL) hl7_out.close() hl7_in.close() # third pass: # - unsplit same-name, same-time, text-type OBX segments out3_fname = gmTools.get_unique_filename ( prefix = 'gm_fix3-%s-' % gmTools.fname_stem(filename), suffix = '.hl7' ) # we can now _expect_ lines to end in HL7_EOL, anything else is an error hl7_in = io.open(out2_fname, mode = 'rt', encoding = 'utf8', newline = HL7_EOL) hl7_out = io.open(out3_fname, mode = 'wt', encoding = 'utf8', newline = '') prev_identity = None prev_fields = None for line in hl7_in: line = line.strip() if not line.startswith('OBX|'): if prev_fields is not None: hl7_out.write('|'.join(prev_fields) + HL7_EOL) hl7_out.write(line + HL7_EOL) prev_identity = None prev_fields = None curr_fields = None continue # first OBX curr_fields = line.split('|') if curr_fields[OBX_field__datatype] != 'FT': hl7_out.write(line + HL7_EOL) prev_identity = None prev_fields = None curr_fields = None continue # first FT type OBX if prev_fields is None: prev_fields = line.split('|') prev_identity = line.split('|') prev_identity[OBX_field__set_id] = '' prev_identity[OBX_field__subid] = '' prev_identity[OBX_field__value] = '' prev_identity = '|'.join(prev_identity) continue # non-first FT type OBX curr_identity = line.split('|') curr_identity[OBX_field__set_id] = '' curr_identity[OBX_field__subid] = '' curr_identity[OBX_field__value] = '' curr_identity = '|'.join(curr_identity) if curr_identity != prev_identity: # write out previous line hl7_out.write('|'.join(prev_fields) + HL7_EOL) # keep current fields, since it may start a "repeat FT type OBX block" prev_fields = curr_fields prev_identity = curr_identity continue if prev_fields[OBX_field__value].endswith(HL7_BRK): prev_fields[OBX_field__value] += curr_fields[OBX_field__value] else: if curr_fields[OBX_field__value].startswith(HL7_BRK): prev_fields[OBX_field__value] += curr_fields[OBX_field__value] else: prev_fields[OBX_field__value] += HL7_BRK prev_fields[OBX_field__value] += curr_fields[OBX_field__value] if prev_fields is not None: hl7_out.write('|'.join(prev_fields) + HL7_EOL) hl7_out.close() hl7_in.close() return out3_fname
def stage_single_PID_hl7_file(filename, source=None, encoding='utf8'): """Multi-step processing of HL7 files. - input must be single-MSH / single-PID / normalized HL7 - imports into clin.incoming_data_unmatched - needs write permissions in dir_of(filename) - moves PID files which were successfully staged into dir_of(filename)/done/PID/ """ local_log_name = gmTools.get_unique_filename ( prefix = gmTools.fname_stem(filename) + '-', suffix = '.stage.log' ) local_logger = logging.FileHandler(local_log_name) local_logger.setLevel(logging.DEBUG) root_logger = logging.getLogger('') root_logger.addHandler(local_logger) _log.info('staging [%s] as unmatched incoming HL7%s', filename, gmTools.coalesce(source, '', ' (%s)')) _log.debug('log file: %s', local_log_name) # sanity checks/setup try: open(filename).close() orig_dir = os.path.split(filename)[0] done_dir = os.path.join(orig_dir, 'done') gmTools.mkdir(done_dir) error_dir = os.path.join(orig_dir, 'failed') gmTools.mkdir(error_dir) except Exception: _log.exception('cannot setup staging environment') root_logger.removeHandler(local_logger) return False # stage try: incoming = gmIncomingData.create_incoming_data('HL7%s' % gmTools.coalesce(source, '', ' (%s)'), filename) if incoming is None: _log.error('cannot stage PID file: %s', filename) root_logger.removeHandler(local_logger) shutil.move(filename, error_dir) shutil.move(local_log_name, error_dir) return False incoming.update_data_from_file(fname = filename) except Exception: _log.exception('error staging PID file') root_logger.removeHandler(local_logger) shutil.move(filename, error_dir) shutil.move(local_log_name, error_dir) return False # set additional data MSH_file = io.open(filename, mode = 'rt', encoding = 'utf8', newline = '') raw_hl7 = MSH_file.read(1024 * 1024 * 5) # 5 MB max MSH_file.close() shutil.move(filename, done_dir) incoming['comment'] = format_hl7_message ( message = raw_hl7, skip_empty_fields = True, eol = '\n' ) HL7 = pyhl7.parse(raw_hl7) del raw_hl7 incoming['comment'] += '\n' incoming['comment'] += ('-' * 80) incoming['comment'] += '\n\n' log = io.open(local_log_name, mode = 'rt', encoding = 'utf8') incoming['comment'] += log.read() log.close() try: incoming['lastnames'] = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__lastname) incoming['firstnames'] = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__firstname) val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__middlename) if val is not None: incoming['firstnames'] += ' ' incoming['firstnames'] += val val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__dob) if val is not None: tmp = time.strptime(val, '%Y%m%d') incoming['dob'] = pyDT.datetime(tmp.tm_year, tmp.tm_mon, tmp.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone) val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__gender) if val is not None: incoming['gender'] = val incoming['external_data_id'] = filename #u'fk_patient_candidates', # u'request_id', # request ID as found in <data> # u'postcode', # u'other_info', # other identifying info in .data # u'requestor', # Requestor of data (e.g. who ordered test results) if available in source data. # u'fk_identity_disambiguated', # u'comment', # a free text comment on this row, eg. why is it here, error logs etc # u'fk_provider_disambiguated' # The provider the data is relevant to. except Exception: _log.exception('cannot add more data') incoming.save() _log.info('successfully staged') root_logger.removeHandler(local_logger) shutil.move(local_log_name, done_dir) return True
def split_hl7_file(filename, target_dir=None, encoding='utf8'): """Multi-step processing of HL7 files. - input can be multi-MSH / multi-PID / partially malformed HL7 - tries to fix oddities - splits by MSH - splits by PID into <target_dir> - needs write permissions in dir_of(filename) - moves HL7 files which were successfully split up into dir_of(filename)/done/ - returns (True|False, list_of_PID_files) """ local_log_name = gmTools.get_unique_filename ( prefix = gmTools.fname_stem(filename) + '-', suffix = '.split.log' ) local_logger = logging.FileHandler(local_log_name) local_logger.setLevel(logging.DEBUG) root_logger = logging.getLogger('') root_logger.addHandler(local_logger) _log.info('splitting HL7 file: %s', filename) _log.debug('log file: %s', local_log_name) # sanity checks/setup try: open(filename).close() orig_dir = os.path.split(filename)[0] done_dir = os.path.join(orig_dir, 'done') gmTools.mkdir(done_dir) error_dir = os.path.join(orig_dir, 'failed') gmTools.mkdir(error_dir) work_filename = gmTools.get_unique_filename(prefix = gmTools.fname_stem(filename) + '-', suffix = '.hl7') if target_dir is None: target_dir = os.path.join(orig_dir, 'PID') _log.debug('target dir: %s', target_dir) gmTools.mkdir(target_dir) except Exception: _log.exception('cannot setup splitting environment') root_logger.removeHandler(local_logger) return False, None # split target_names = [] try: shutil.copy(filename, work_filename) fixed_filename = __fix_malformed_hl7_file(work_filename, encoding = encoding) MSH_fnames = __split_hl7_file_by_MSH(fixed_filename, encoding) PID_fnames = [] for MSH_fname in MSH_fnames: PID_fnames.extend(__split_MSH_by_PID(MSH_fname)) for PID_fname in PID_fnames: shutil.move(PID_fname, target_dir) target_names.append(os.path.join(target_dir, os.path.split(PID_fname)[1])) except Exception: _log.exception('cannot split HL7 file') for target_name in target_names: try: os.remove(target_name) except Exception: pass root_logger.removeHandler(local_logger) shutil.move(local_log_name, error_dir) return False, None _log.info('successfully split') root_logger.removeHandler(local_logger) try: shutil.move(filename, done_dir) shutil.move(local_log_name, done_dir) except shutil.Error: _log.exception('cannot move hl7 file or log file to holding area') return True, target_names
def convert_latex_to_pdf(filename: str = None, verbose: bool = False, is_sandboxed: bool = False) -> str: """Compile LaTeX code to PDF using pdflatex. Args: is_sandboxed: whether or not to create a sandbox for compiling Returns: Name of resulting PDF, or None on failure. """ global __LaTeX_version_checked global __pdflatex_executable if not __LaTeX_version_checked: __LaTeX_version_checked = True found, __pdflatex_executable = gmShellAPI.detect_external_binary( binary='pdflatex') if not found: _log.error('pdflatex not found') return None cmd_line = [__pdflatex_executable, '-version'] success, ret_code, stdout = gmShellAPI.run_process(cmd_line=cmd_line, encoding='utf8', verbose=True) if not success: _log.error('[%s] failed, LaTeX not usable', cmd_line) return None if is_sandboxed: sandbox_dir = os.path.split(filename)[0] else: sandbox_dir = gmTools.mk_sandbox_dir( prefix=gmTools.fname_stem(filename) + '_') shutil.copy(filename, sandbox_dir) filename = os.path.join(sandbox_dir, os.path.split(filename)[1]) _log.debug('LaTeX sandbox directory: [%s]', sandbox_dir) cmd_final = [ __pdflatex_executable, '-recorder', '-interaction=nonstopmode', "-output-directory=%s" % sandbox_dir ] cmd_draft = cmd_final + ['-draftmode'] # LaTeX can need up to three runs to get cross references et al right for cmd2run in [cmd_draft, cmd_draft, cmd_final]: success, ret_code, stdout = gmShellAPI.run_process( cmd_line=cmd2run + [filename], acceptable_return_codes=[0], encoding='utf8', verbose=True #_cfg.get(option = 'debug') ) if not success: _log.error( 'problem running pdflatex, cannot generate form output, trying diagnostics' ) found, binary = gmShellAPI.find_first_binary( binaries=['lacheck', 'miktex-lacheck.exe']) if not found: _log.debug('lacheck not found') else: cmd_line = [binary, filename] success, ret_code, stdout = gmShellAPI.run_process( cmd_line=cmd_line, encoding='utf8', verbose=True) found, binary = gmShellAPI.find_first_binary( binaries=['chktex', 'ChkTeX.exe']) if not found: _log.debug('chcktex not found') else: cmd_line = [binary, '--verbosity=2', '--headererr', filename] success, ret_code, stdout = gmShellAPI.run_process( cmd_line=cmd_line, encoding='utf8', verbose=True) return None return '%s.pdf' % os.path.splitext(filename)[0]