def run(conn=None): gmPG2.run_rw_queries(link_obj=conn, queries=[{ 'cmd': SQL_CREATE_DATA_SOURCE }], end_tx=False) rows, idx = gmPG2.run_ro_queries(link_obj=conn, queries=[{ 'cmd': SQL_GET_DATA_SOURCES }], get_col_idx=False) data_sources = rows[0] data_fname = os.path.join('..', 'sql', 'v14-v15', 'data', 'atc_only-utf8.csv') csv_file = io.open(data_fname, mode='rt', encoding='utf8', errors='replace') atc_reader = gmTools.unicode_csv_reader( csv_file, delimiter=",", quotechar='"', dict=True, fieldnames=[u'atc', u'en', u'fr', u'de']) for atc_line in atc_reader: queries = [{ 'cmd': SQL_INSERT, 'args': { u'src': data_sources['en'], u'code': atc_line['atc'], 'term': atc_line['en'] } }, { 'cmd': SQL_INSERT, 'args': { u'src': data_sources['fr'], u'code': atc_line['atc'], 'term': atc_line['fr'] } }, { 'cmd': SQL_INSERT, 'args': { u'src': data_sources['de'], u'code': atc_line['atc'], 'term': atc_line['de'] } }] gmPG2.run_rw_queries(link_obj=conn, queries=queries, end_tx=False) conn.commit()
def __init__(self, filename=None): _log.info(cGelbeListeCSVFile.version) self.filename = filename if filename is None: self.filename = cGelbeListeCSVFile.default_transfer_file_windows _log.debug('reading Gelbe Liste/MMI drug data from [%s]', self.filename) self.csv_file = io.open(filename, mode = 'rt', encoding = cGelbeListeCSVFile.default_encoding) self.csv_lines = gmTools.unicode_csv_reader ( self.csv_file, fieldnames = cGelbeListeCSVFile.csv_fieldnames, delimiter = ';', quotechar = '"', dict = True )
def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'): if version is None: version = get_version(license_fname=license_fname) if version is None: raise ValueError('cannot detect LOINC version') _log.debug('importing LOINC version [%s]', version) # clean out staging area curs = conn.cursor() cmd = """DELETE FROM staging.loinc_staging""" gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd}]) curs.close() conn.commit() _log.debug('staging table emptied') # import data from csv file into staging table csv_file = io.open(data_fname, mode='rt', encoding='utf8', errors='replace') loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter="\t", quotechar='"') curs = conn.cursor() cmd = """INSERT INTO staging.loinc_staging values (%s%%s)""" % ( '%s, ' * (len(loinc_fields) - 1)) first = False for loinc_line in loinc_reader: if not first: first = True continue gmPG2.run_rw_queries(link_obj=curs, queries=[{ 'cmd': cmd, 'args': loinc_line }]) curs.close() conn.commit() csv_file.close() _log.debug('staging table loaded') # create data source record in_file = io.open(license_fname, mode='rt', encoding='utf8', errors='replace') desc = in_file.read() in_file.close() args = { 'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang } queries = [ # insert if not existing { 'args': args, 'cmd': """ INSERT INTO ref.data_source (name_long, name_short, version) SELECT %(name_long)s, %(name_short)s, %(ver)s WHERE NOT EXISTS ( SELECT 1 FROM ref.data_source WHERE name_long = %(name_long)s AND name_short = %(name_short)s AND version = %(ver)s )""" }, # update non-unique fields { 'args': args, 'cmd': """ UPDATE ref.data_source SET description = %(desc)s, source = %(url)s, lang = %(lang)s WHERE name_long = %(name_long)s AND name_short = %(name_short)s AND version = %(ver)s """ }, # retrieve PK of data source { 'args': args, 'cmd': """SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s""" } ] curs = conn.cursor() rows, idx = gmPG2.run_rw_queries(link_obj=curs, queries=queries, return_data=True) data_src_pk = rows[0][0] curs.close() _log.debug('data source record created or updated, pk is #%s', data_src_pk) # import from staging table to real table args = {'src_pk': data_src_pk} queries = [] queries.append({ 'args': args, 'cmd': """ INSERT INTO ref.loinc ( fk_data_source, term, code ) SELECT %(src_pk)s, coalesce ( nullif(long_common_name, ''), ( coalesce(nullif(component, '') || ':', '') || coalesce(nullif(property, '') || ':', '') || coalesce(nullif(time_aspect, '') || ':', '') || coalesce(nullif(system, '') || ':', '') || coalesce(nullif(scale_type, '') || ':', '') || coalesce(nullif(method_type, '') || ':', '') ) ), nullif(loinc_num, '') FROM staging.loinc_staging st_ls WHERE NOT EXISTS ( SELECT 1 FROM ref.loinc r_l WHERE r_l.fk_data_source = %(src_pk)s AND r_l.code = nullif(st_ls.loinc_num, '') AND r_l.term = coalesce ( nullif(st_ls.long_common_name, ''), ( coalesce(nullif(st_ls.component, '') || ':', '') || coalesce(nullif(st_ls.property, '') || ':', '') || coalesce(nullif(st_ls.time_aspect, '') || ':', '') || coalesce(nullif(st_ls.system, '') || ':', '') || coalesce(nullif(st_ls.scale_type, '') || ':', '') || coalesce(nullif(st_ls.method_type, '') || ':', '') ) ) )""" }) queries.append({ 'args': args, 'cmd': """ UPDATE ref.loinc SET comment = nullif(st_ls.comments, ''), component = nullif(st_ls.component, ''), property = nullif(st_ls.property, ''), time_aspect = nullif(st_ls.time_aspect, ''), system = nullif(st_ls.system, ''), scale_type = nullif(st_ls.scale_type, ''), method_type = nullif(st_ls.method_type, ''), related_names_1_old = nullif(st_ls.related_names_1_old, ''), grouping_class = nullif(st_ls.class, ''), loinc_internal_source = nullif(st_ls.source, ''), dt_last_change = nullif(st_ls.dt_last_change, ''), change_type = nullif(st_ls.change_type, ''), answer_list = nullif(st_ls.answer_list, ''), code_status = nullif(st_ls.status, ''), maps_to = nullif(st_ls.map_to, ''), scope = nullif(st_ls.scope, ''), normal_range = nullif(st_ls.normal_range, ''), ipcc_units = nullif(st_ls.ipcc_units, ''), reference = nullif(st_ls.reference, ''), exact_component_synonym = nullif(st_ls.exact_component_synonym, ''), molar_mass = nullif(st_ls.molar_mass, ''), grouping_class_type = nullif(st_ls.class_type, '')::smallint, formula = nullif(st_ls.formula, ''), species = nullif(st_ls.species, ''), example_answers = nullif(st_ls.example_answers, ''), acs_synonyms = nullif(st_ls.acs_synonyms, ''), base_name = nullif(st_ls.base_name, ''), final = nullif(st_ls.final, ''), naa_ccr_id = nullif(st_ls.naa_ccr_id, ''), code_table = nullif(st_ls.code_table, ''), is_set_root = nullif(st_ls.is_set_root, '')::boolean, panel_elements = nullif(st_ls.panel_elements, ''), survey_question_text = nullif(st_ls.survey_question_text, ''), survey_question_source = nullif(st_ls.survey_question_source, ''), units_required = nullif(st_ls.units_required, ''), submitted_units = nullif(st_ls.submitted_units, ''), related_names_2 = nullif(st_ls.related_names_2, ''), short_name = nullif(st_ls.short_name, ''), order_obs = nullif(st_ls.order_obs, ''), cdisc_common_tests = nullif(st_ls.cdisc_common_tests, ''), hl7_field_subfield_id = nullif(st_ls.hl7_field_subfield_id, ''), external_copyright_notice = nullif(st_ls.external_copyright_notice, ''), example_units = nullif(st_ls.example_units, ''), inpc_percentage = nullif(st_ls.inpc_percentage, ''), long_common_name = nullif(st_ls.long_common_name, '') FROM staging.loinc_staging st_ls WHERE fk_data_source = %(src_pk)s AND code = nullif(st_ls.loinc_num, '') AND term = coalesce ( nullif(st_ls.long_common_name, ''), ( coalesce(nullif(st_ls.component, '') || ':', '') || coalesce(nullif(st_ls.property, '') || ':', '') || coalesce(nullif(st_ls.time_aspect, '') || ':', '') || coalesce(nullif(st_ls.system, '') || ':', '') || coalesce(nullif(st_ls.scale_type, '') || ':', '') || coalesce(nullif(st_ls.method_type, '') || ':', '') ) ) """ }) curs = conn.cursor() gmPG2.run_rw_queries(link_obj=curs, queries=queries) curs.close() conn.commit() _log.debug('transfer from staging table to real table done') # clean out staging area curs = conn.cursor() cmd = """DELETE FROM staging.loinc_staging""" gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd}]) curs.close() conn.commit() _log.debug('staging table emptied') return True
def atc_import(cfg_fname=None, conn=None): # read meta data _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8') data_fname = os.path.join ( os.path.dirname(cfg_fname), _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')]) ) # must be in same dir as conf file version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')]) lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')]) desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')]) url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')]) name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')]) name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')]) _cfg.remove_source(source = 'atc') _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname) args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} # create data source record queries = [ { 'cmd': """delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 'args': args }, { 'cmd': """ insert into ref.data_source (name_long, name_short, version, description, lang, source) values ( %(name_long)s, %(name_short)s, %(ver)s, %(desc)s, %(lang)s, %(url)s )""", 'args': args }, { 'cmd': """select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 'args': args } ] rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True) data_src_pk = rows[0][0] _log.debug('ATC data source record created, pk is #%s', data_src_pk) # import data csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"') # clean out staging area curs = conn.cursor() cmd = """delete from ref.atc_staging""" gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) curs.close() conn.commit() _log.debug('ATC staging table emptied') # from file into staging table curs = conn.cursor() cmd = """insert into ref.atc_staging values (%s, %s, %s, %s, %s)""" first = False for atc_line in atc_reader: # skip first if not first: first = True continue # skip blanks if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == '': continue comment = '' unit = '' adro = '' # "1,1 mg O,P,R,..." if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): tmp, unit, adro = regex.split('\s', atc_line[4]) # "1,1 mg O,P,R bezogen auf ..." elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) # "20 mg O" elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): tmp, unit, adro = regex.split('\s', atc_line[4]) # "20 mg O bezogen auf ..." elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) # "Standarddosis: 1 Tablette oder 30 ml Mixtur" else: comment = atc_line[4] args = [ atc_line[0].strip(), atc_line[2], unit, adro, comment ] gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) curs.close() conn.commit() csv_file.close() _log.debug('ATC staging table loaded') # from staging table to real table curs = conn.cursor() args = {'src_pk': data_src_pk} cmd = """ insert into ref.atc ( fk_data_source, code, term, comment, unit, administration_route ) select %(src_pk)s, atc, name, nullif(comment, ''), nullif(unit, ''), nullif(adro, '') from ref.atc_staging """ gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) curs.close() conn.commit() _log.debug('transfer from ATC staging table to real ATC table done') # clean out staging area curs = conn.cursor() cmd = """delete from ref.atc_staging""" gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) curs.close() conn.commit() _log.debug('ATC staging table emptied') return True
def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'): if version is None: version = get_version(license_fname = license_fname) if version is None: raise ValueError('cannot detect LOINC version') _log.debug('importing LOINC version [%s]', version) # clean out staging area curs = conn.cursor() cmd = """DELETE FROM staging.loinc_staging""" gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) curs.close() conn.commit() _log.debug('staging table emptied') # import data from csv file into staging table csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') curs = conn.cursor() cmd = """INSERT INTO staging.loinc_staging values (%s%%s)""" % ('%s, ' * (len(loinc_fields) - 1)) first = False for loinc_line in loinc_reader: if not first: first = True continue gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) curs.close() conn.commit() csv_file.close() _log.debug('staging table loaded') # create data source record in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') desc = in_file.read() in_file.close() args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} queries = [ # insert if not existing {'args': args, 'cmd': """ INSERT INTO ref.data_source (name_long, name_short, version) SELECT %(name_long)s, %(name_short)s, %(ver)s WHERE NOT EXISTS ( SELECT 1 FROM ref.data_source WHERE name_long = %(name_long)s AND name_short = %(name_short)s AND version = %(ver)s )""" }, # update non-unique fields {'args': args, 'cmd': """ UPDATE ref.data_source SET description = %(desc)s, source = %(url)s, lang = %(lang)s WHERE name_long = %(name_long)s AND name_short = %(name_short)s AND version = %(ver)s """ }, # retrieve PK of data source {'args': args, 'cmd': """SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""} ] curs = conn.cursor() rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True) data_src_pk = rows[0][0] curs.close() _log.debug('data source record created or updated, pk is #%s', data_src_pk) # import from staging table to real table args = {'src_pk': data_src_pk} queries = [] queries.append ({ 'args': args, 'cmd': """ INSERT INTO ref.loinc ( fk_data_source, term, code ) SELECT %(src_pk)s, coalesce ( nullif(long_common_name, ''), ( coalesce(nullif(component, '') || ':', '') || coalesce(nullif(property, '') || ':', '') || coalesce(nullif(time_aspect, '') || ':', '') || coalesce(nullif(system, '') || ':', '') || coalesce(nullif(scale_type, '') || ':', '') || coalesce(nullif(method_type, '') || ':', '') ) ), nullif(loinc_num, '') FROM staging.loinc_staging st_ls WHERE NOT EXISTS ( SELECT 1 FROM ref.loinc r_l WHERE r_l.fk_data_source = %(src_pk)s AND r_l.code = nullif(st_ls.loinc_num, '') AND r_l.term = coalesce ( nullif(st_ls.long_common_name, ''), ( coalesce(nullif(st_ls.component, '') || ':', '') || coalesce(nullif(st_ls.property, '') || ':', '') || coalesce(nullif(st_ls.time_aspect, '') || ':', '') || coalesce(nullif(st_ls.system, '') || ':', '') || coalesce(nullif(st_ls.scale_type, '') || ':', '') || coalesce(nullif(st_ls.method_type, '') || ':', '') ) ) )""" }) queries.append ({ 'args': args, 'cmd': """ UPDATE ref.loinc SET comment = nullif(st_ls.comments, ''), component = nullif(st_ls.component, ''), property = nullif(st_ls.property, ''), time_aspect = nullif(st_ls.time_aspect, ''), system = nullif(st_ls.system, ''), scale_type = nullif(st_ls.scale_type, ''), method_type = nullif(st_ls.method_type, ''), related_names_1_old = nullif(st_ls.related_names_1_old, ''), grouping_class = nullif(st_ls.class, ''), loinc_internal_source = nullif(st_ls.source, ''), dt_last_change = nullif(st_ls.dt_last_change, ''), change_type = nullif(st_ls.change_type, ''), answer_list = nullif(st_ls.answer_list, ''), code_status = nullif(st_ls.status, ''), maps_to = nullif(st_ls.map_to, ''), scope = nullif(st_ls.scope, ''), normal_range = nullif(st_ls.normal_range, ''), ipcc_units = nullif(st_ls.ipcc_units, ''), reference = nullif(st_ls.reference, ''), exact_component_synonym = nullif(st_ls.exact_component_synonym, ''), molar_mass = nullif(st_ls.molar_mass, ''), grouping_class_type = nullif(st_ls.class_type, '')::smallint, formula = nullif(st_ls.formula, ''), species = nullif(st_ls.species, ''), example_answers = nullif(st_ls.example_answers, ''), acs_synonyms = nullif(st_ls.acs_synonyms, ''), base_name = nullif(st_ls.base_name, ''), final = nullif(st_ls.final, ''), naa_ccr_id = nullif(st_ls.naa_ccr_id, ''), code_table = nullif(st_ls.code_table, ''), is_set_root = nullif(st_ls.is_set_root, '')::boolean, panel_elements = nullif(st_ls.panel_elements, ''), survey_question_text = nullif(st_ls.survey_question_text, ''), survey_question_source = nullif(st_ls.survey_question_source, ''), units_required = nullif(st_ls.units_required, ''), submitted_units = nullif(st_ls.submitted_units, ''), related_names_2 = nullif(st_ls.related_names_2, ''), short_name = nullif(st_ls.short_name, ''), order_obs = nullif(st_ls.order_obs, ''), cdisc_common_tests = nullif(st_ls.cdisc_common_tests, ''), hl7_field_subfield_id = nullif(st_ls.hl7_field_subfield_id, ''), external_copyright_notice = nullif(st_ls.external_copyright_notice, ''), example_units = nullif(st_ls.example_units, ''), inpc_percentage = nullif(st_ls.inpc_percentage, ''), long_common_name = nullif(st_ls.long_common_name, '') FROM staging.loinc_staging st_ls WHERE fk_data_source = %(src_pk)s AND code = nullif(st_ls.loinc_num, '') AND term = coalesce ( nullif(st_ls.long_common_name, ''), ( coalesce(nullif(st_ls.component, '') || ':', '') || coalesce(nullif(st_ls.property, '') || ':', '') || coalesce(nullif(st_ls.time_aspect, '') || ':', '') || coalesce(nullif(st_ls.system, '') || ':', '') || coalesce(nullif(st_ls.scale_type, '') || ':', '') || coalesce(nullif(st_ls.method_type, '') || ':', '') ) ) """ }) curs = conn.cursor() gmPG2.run_rw_queries(link_obj = curs, queries = queries) curs.close() conn.commit() _log.debug('transfer from staging table to real table done') # clean out staging area curs = conn.cursor() cmd = """DELETE FROM staging.loinc_staging""" gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) curs.close() conn.commit() _log.debug('staging table emptied') return True
def create_sql(filename): csv_file = open(filename, mode='rt', encoding='utf-8-sig') csv_lines = gmTools.unicode_csv_reader(csv_file, fieldnames=field_names, delimiter=';', quotechar='"', dict=True) print SQL_start line_idx = 0 skip_line = False for line in csv_lines: line_idx += 1 print "-- line #%s" % line_idx # normalize field content for field in field_names: try: line[field] = line[field].strip().strip(';,').strip().replace( "'", "''") except AttributeError: # trailing fields are a list pass # verify required fields for field in non_empty_fields: if line[field] == '': print "-- ignoring line: empty field [%s]" % field print "--", line print "" skip_line = True break if skip_line: skip_line = False continue # verify numeric fields for field in numeric_fields: if line[field] == '': continue success, num_val = gmTools.input2decimal(initial=line[field]) if not success: print "-- ignoring line: field [%s] not numeric: >>>%s<<<" % ( field, line[field]) print "--", line print "" skip_line = True break line[field] = num_val if skip_line: skip_line = False continue # actually create SQL # loop over strengths for field in numeric_fields: if line[field] == '': continue line['brand_name'] = ('%%(product)s %%(%s)s (%%(company)s)' % field) % line line['strength'] = line[field] print SQL_stage_drug % line print SQL_end
def create_sql(filename): csv_file = io.open(filename, mode = 'rt', encoding = u'utf8') csv_lines = gmTools.unicode_csv_reader ( csv_file, fieldnames = field_names, delimiter = ';', quotechar = '"', dict = True ) print SQL_start line_idx = 0 skip_line = False for line in csv_lines: line_idx += 1 print "-- line #%s" % line_idx # normalize field content for field in field_names: try: line[field] = line[field].strip().strip(u';,').strip().replace(u"'", u"''") except AttributeError: # trailing fields are a list pass # verify required fields for field in non_empty_fields: if line[field] == u'': print "-- ignoring line: empty field [%s]" % field print "--", line print "" skip_line = True break if skip_line: skip_line = False continue # verify numeric fields for field in numeric_fields: if line[field] == u'': continue success, num_val = gmTools.input2decimal(initial = line[field]) if not success: print "-- ignoring line: field [%s] not numeric: >>>%s<<<" % (field, line[field]) print "--", line print "" skip_line = True break line[field] = num_val if skip_line: skip_line = False continue # actually create SQL # loop over strengths for field in numeric_fields: if line[field] == u'': continue line['brand_name'] = (u'%%(brand)s %%(%s)s (%%(company)s)' % field) % line line['strength'] = line[field] print SQL_stage_drug % line print SQL_end
def atc_import(cfg_fname=None, conn=None): # read meta data _cfg.add_file_source(source='atc', file=cfg_fname, encoding='utf8') data_fname = os.path.join(os.path.dirname(cfg_fname), _cfg.get(group='atc', option='data file', source_order=[ ('atc', 'return') ])) # must be in same dir as conf file version = _cfg.get(group='atc', option='version', source_order=[('atc', 'return')]) lang = _cfg.get(group='atc', option='language', source_order=[('atc', 'return')]) desc = _cfg.get(group='atc', option='description', source_order=[('atc', 'return')]) url = _cfg.get(group='atc', option='url', source_order=[('atc', 'return')]) name_long = _cfg.get(group='atc', option='long name', source_order=[('atc', 'return')]) name_short = _cfg.get(group='atc', option='short name', source_order=[('atc', 'return')]) _cfg.remove_source(source='atc') _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname) args = { 'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang } # create data source record queries = [{ 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 'args': args }, { 'cmd': u""" insert into ref.data_source (name_long, name_short, version, description, lang, source) values ( %(name_long)s, %(name_short)s, %(ver)s, %(desc)s, %(lang)s, %(url)s )""", 'args': args }, { 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 'args': args }] rows, idx = gmPG2.run_rw_queries(queries=queries, return_data=True) data_src_pk = rows[0][0] _log.debug('ATC data source record created, pk is #%s', data_src_pk) # import data csv_file = io.open(data_fname, mode='rt', encoding='utf8', errors='replace') atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter=",", quotechar='"') # clean out staging area curs = conn.cursor() cmd = u"""delete from ref.atc_staging""" gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd}]) curs.close() conn.commit() _log.debug('ATC staging table emptied') # from file into staging table curs = conn.cursor() cmd = u"""insert into ref.atc_staging values (%s, %s, %s, %s, %s)""" first = False for atc_line in atc_reader: # skip first if not first: first = True continue # skip blanks if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[ 4] == u'': continue comment = u'' unit = u'' adro = u'' # "1,1 mg O,P,R,..." if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): tmp, unit, adro = regex.split('\s', atc_line[4]) # "1,1 mg O,P,R bezogen auf ..." elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) # "20 mg O" elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): tmp, unit, adro = regex.split('\s', atc_line[4]) # "20 mg O bezogen auf ..." elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3) # "Standarddosis: 1 Tablette oder 30 ml Mixtur" else: comment = atc_line[4] args = [atc_line[0].strip(), atc_line[2], unit, adro, comment] gmPG2.run_rw_queries(link_obj=curs, queries=[{ 'cmd': cmd, 'args': args }]) curs.close() conn.commit() csv_file.close() _log.debug('ATC staging table loaded') # from staging table to real table curs = conn.cursor() args = {'src_pk': data_src_pk} cmd = u""" insert into ref.atc ( fk_data_source, code, term, comment, unit, administration_route ) select %(src_pk)s, atc, name, nullif(comment, ''), nullif(unit, ''), nullif(adro, '') from ref.atc_staging """ gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd, 'args': args}]) curs.close() conn.commit() _log.debug('transfer from ATC staging table to real ATC table done') # clean out staging area curs = conn.cursor() cmd = u"""delete from ref.atc_staging""" gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd}]) curs.close() conn.commit() _log.debug('ATC staging table emptied') return True