Example #1
0
def run(conn=None):

    gmPG2.run_rw_queries(link_obj=conn,
                         queries=[{
                             'cmd': SQL_CREATE_DATA_SOURCE
                         }],
                         end_tx=False)
    rows, idx = gmPG2.run_ro_queries(link_obj=conn,
                                     queries=[{
                                         'cmd': SQL_GET_DATA_SOURCES
                                     }],
                                     get_col_idx=False)
    data_sources = rows[0]

    data_fname = os.path.join('..', 'sql', 'v14-v15', 'data',
                              'atc_only-utf8.csv')
    csv_file = io.open(data_fname,
                       mode='rt',
                       encoding='utf8',
                       errors='replace')
    atc_reader = gmTools.unicode_csv_reader(
        csv_file,
        delimiter=",",
        quotechar='"',
        dict=True,
        fieldnames=[u'atc', u'en', u'fr', u'de'])

    for atc_line in atc_reader:
        queries = [{
            'cmd': SQL_INSERT,
            'args': {
                u'src': data_sources['en'],
                u'code': atc_line['atc'],
                'term': atc_line['en']
            }
        }, {
            'cmd': SQL_INSERT,
            'args': {
                u'src': data_sources['fr'],
                u'code': atc_line['atc'],
                'term': atc_line['fr']
            }
        }, {
            'cmd': SQL_INSERT,
            'args': {
                u'src': data_sources['de'],
                u'code': atc_line['atc'],
                'term': atc_line['de']
            }
        }]
        gmPG2.run_rw_queries(link_obj=conn, queries=queries, end_tx=False)

    conn.commit()
Example #2
0
	def __init__(self, filename=None):

		_log.info(cGelbeListeCSVFile.version)

		self.filename = filename
		if filename is None:
			self.filename = cGelbeListeCSVFile.default_transfer_file_windows

		_log.debug('reading Gelbe Liste/MMI drug data from [%s]', self.filename)

		self.csv_file = io.open(filename, mode = 'rt', encoding = cGelbeListeCSVFile.default_encoding)

		self.csv_lines = gmTools.unicode_csv_reader (
			self.csv_file,
			fieldnames = cGelbeListeCSVFile.csv_fieldnames,
			delimiter = ';',
			quotechar = '"',
			dict = True
		)
Example #3
0
	def __init__(self, filename=None):

		_log.info(cGelbeListeCSVFile.version)

		self.filename = filename
		if filename is None:
			self.filename = cGelbeListeCSVFile.default_transfer_file_windows

		_log.debug('reading Gelbe Liste/MMI drug data from [%s]', self.filename)

		self.csv_file = io.open(filename, mode = 'rt', encoding = cGelbeListeCSVFile.default_encoding)

		self.csv_lines = gmTools.unicode_csv_reader (
			self.csv_file,
			fieldnames = cGelbeListeCSVFile.csv_fieldnames,
			delimiter = ';',
			quotechar = '"',
			dict = True
		)
Example #4
0
def loinc_import(data_fname=None,
                 license_fname=None,
                 version=None,
                 conn=None,
                 lang='en_EN'):

    if version is None:
        version = get_version(license_fname=license_fname)

    if version is None:
        raise ValueError('cannot detect LOINC version')

    _log.debug('importing LOINC version [%s]', version)

    # clean out staging area
    curs = conn.cursor()
    cmd = """DELETE FROM staging.loinc_staging"""
    gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd}])
    curs.close()
    conn.commit()
    _log.debug('staging table emptied')

    # import data from csv file into staging table
    csv_file = io.open(data_fname,
                       mode='rt',
                       encoding='utf8',
                       errors='replace')
    loinc_reader = gmTools.unicode_csv_reader(csv_file,
                                              delimiter="\t",
                                              quotechar='"')
    curs = conn.cursor()
    cmd = """INSERT INTO staging.loinc_staging values (%s%%s)""" % (
        '%s, ' * (len(loinc_fields) - 1))
    first = False
    for loinc_line in loinc_reader:
        if not first:
            first = True
            continue
        gmPG2.run_rw_queries(link_obj=curs,
                             queries=[{
                                 'cmd': cmd,
                                 'args': loinc_line
                             }])
    curs.close()
    conn.commit()
    csv_file.close()
    _log.debug('staging table loaded')

    # create data source record
    in_file = io.open(license_fname,
                      mode='rt',
                      encoding='utf8',
                      errors='replace')
    desc = in_file.read()
    in_file.close()
    args = {
        'ver': version,
        'desc': desc,
        'url': origin_url,
        'name_long': name_long,
        'name_short': name_short,
        'lang': lang
    }
    queries = [
        # insert if not existing
        {
            'args':
            args,
            'cmd':
            """
			INSERT INTO ref.data_source (name_long, name_short, version) SELECT
				%(name_long)s,
				%(name_short)s,
				%(ver)s
			WHERE NOT EXISTS (
				SELECT 1 FROM ref.data_source WHERE
					name_long = %(name_long)s
						AND
					name_short = %(name_short)s
						AND
					version = %(ver)s
			)"""
        },
        # update non-unique fields
        {
            'args':
            args,
            'cmd':
            """
			UPDATE ref.data_source SET
				description = %(desc)s,
				source = %(url)s,
				lang = %(lang)s
			WHERE
				name_long = %(name_long)s
					AND
				name_short = %(name_short)s
					AND
				version = %(ver)s
			"""
        },
        # retrieve PK of data source
        {
            'args':
            args,
            'cmd':
            """SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""
        }
    ]
    curs = conn.cursor()
    rows, idx = gmPG2.run_rw_queries(link_obj=curs,
                                     queries=queries,
                                     return_data=True)
    data_src_pk = rows[0][0]
    curs.close()
    _log.debug('data source record created or updated, pk is #%s', data_src_pk)

    # import from staging table to real table
    args = {'src_pk': data_src_pk}
    queries = []
    queries.append({
        'args':
        args,
        'cmd':
        """
			INSERT INTO ref.loinc (
				fk_data_source, term, code
			)
			SELECT
				%(src_pk)s,
				coalesce (
					nullif(long_common_name, ''),
					(
						coalesce(nullif(component, '') || ':', '') ||
						coalesce(nullif(property, '') || ':', '') ||
						coalesce(nullif(time_aspect, '') || ':', '') ||
						coalesce(nullif(system, '') || ':', '') ||
						coalesce(nullif(scale_type, '') || ':', '') ||
						coalesce(nullif(method_type, '') || ':', '')
					)
				),
				nullif(loinc_num, '')
			FROM
				staging.loinc_staging st_ls
			WHERE NOT EXISTS (
				SELECT 1 FROM ref.loinc r_l WHERE
					r_l.fk_data_source = %(src_pk)s
						AND
					r_l.code = nullif(st_ls.loinc_num, '')
						AND
					r_l.term = 	coalesce (
						nullif(st_ls.long_common_name, ''),
						(
							coalesce(nullif(st_ls.component, '') || ':', '') ||
							coalesce(nullif(st_ls.property, '') || ':', '') ||
							coalesce(nullif(st_ls.time_aspect, '') || ':', '') ||
							coalesce(nullif(st_ls.system, '') || ':', '') ||
							coalesce(nullif(st_ls.scale_type, '') || ':', '') ||
							coalesce(nullif(st_ls.method_type, '') || ':', '')
						)
					)
			)"""
    })
    queries.append({
        'args':
        args,
        'cmd':
        """
			UPDATE ref.loinc SET
				comment = nullif(st_ls.comments, ''),
				component = nullif(st_ls.component, ''),
				property = nullif(st_ls.property, ''),
				time_aspect = nullif(st_ls.time_aspect, ''),
				system = nullif(st_ls.system, ''),
				scale_type = nullif(st_ls.scale_type, ''),
				method_type = nullif(st_ls.method_type, ''),
				related_names_1_old = nullif(st_ls.related_names_1_old, ''),
				grouping_class = nullif(st_ls.class, ''),
				loinc_internal_source = nullif(st_ls.source, ''),
				dt_last_change = nullif(st_ls.dt_last_change, ''),
				change_type = nullif(st_ls.change_type, ''),
				answer_list = nullif(st_ls.answer_list, ''),
				code_status = nullif(st_ls.status, ''),
				maps_to = nullif(st_ls.map_to, ''),
				scope = nullif(st_ls.scope, ''),
				normal_range = nullif(st_ls.normal_range, ''),
				ipcc_units = nullif(st_ls.ipcc_units, ''),
				reference = nullif(st_ls.reference, ''),
				exact_component_synonym = nullif(st_ls.exact_component_synonym, ''),
				molar_mass = nullif(st_ls.molar_mass, ''),
				grouping_class_type = nullif(st_ls.class_type, '')::smallint,
				formula = nullif(st_ls.formula, ''),
				species = nullif(st_ls.species, ''),
				example_answers = nullif(st_ls.example_answers, ''),
				acs_synonyms = nullif(st_ls.acs_synonyms, ''),
				base_name = nullif(st_ls.base_name, ''),
				final = nullif(st_ls.final, ''),
				naa_ccr_id = nullif(st_ls.naa_ccr_id, ''),
				code_table = nullif(st_ls.code_table, ''),
				is_set_root = nullif(st_ls.is_set_root, '')::boolean,
				panel_elements = nullif(st_ls.panel_elements, ''),
				survey_question_text = nullif(st_ls.survey_question_text, ''),
				survey_question_source = nullif(st_ls.survey_question_source, ''),
				units_required = nullif(st_ls.units_required, ''),
				submitted_units = nullif(st_ls.submitted_units, ''),
				related_names_2 = nullif(st_ls.related_names_2, ''),
				short_name = nullif(st_ls.short_name, ''),
				order_obs = nullif(st_ls.order_obs, ''),
				cdisc_common_tests = nullif(st_ls.cdisc_common_tests, ''),
				hl7_field_subfield_id = nullif(st_ls.hl7_field_subfield_id, ''),
				external_copyright_notice = nullif(st_ls.external_copyright_notice, ''),
				example_units = nullif(st_ls.example_units, ''),
				inpc_percentage = nullif(st_ls.inpc_percentage, ''),
				long_common_name = nullif(st_ls.long_common_name, '')
			FROM
				staging.loinc_staging st_ls
			WHERE
				fk_data_source = %(src_pk)s
					AND
				code = nullif(st_ls.loinc_num, '')
					AND
				term = coalesce (
					nullif(st_ls.long_common_name, ''),
					(
						coalesce(nullif(st_ls.component, '') || ':', '') ||
						coalesce(nullif(st_ls.property, '') || ':', '') ||
						coalesce(nullif(st_ls.time_aspect, '') || ':', '') ||
						coalesce(nullif(st_ls.system, '') || ':', '') ||
						coalesce(nullif(st_ls.scale_type, '') || ':', '') ||
						coalesce(nullif(st_ls.method_type, '') || ':', '')
					)
				)
		"""
    })
    curs = conn.cursor()
    gmPG2.run_rw_queries(link_obj=curs, queries=queries)
    curs.close()
    conn.commit()
    _log.debug('transfer from staging table to real table done')

    # clean out staging area
    curs = conn.cursor()
    cmd = """DELETE FROM staging.loinc_staging"""
    gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd}])
    curs.close()
    conn.commit()
    _log.debug('staging table emptied')

    return True
Example #5
0
def atc_import(cfg_fname=None, conn=None):

	# read meta data
	_cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8')

	data_fname = os.path.join (
		os.path.dirname(cfg_fname),
		_cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')])
	)			# must be in same dir as conf file
	version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')])
	lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')])
	desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')])
	url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')])
	name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')])
	name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')])

	_cfg.remove_source(source = 'atc')

	_log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname)

	args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}

	# create data source record
	queries = [
		{
		'cmd': """delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
		'args': args
		}, {
		'cmd': """
insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
	%(name_long)s,
	%(name_short)s,
	%(ver)s,
	%(desc)s,
	%(lang)s,
	%(url)s
)""",
		'args': args
		}, {
		'cmd': """select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
		'args': args
		}
	]
	rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
	data_src_pk = rows[0][0]
	_log.debug('ATC data source record created, pk is #%s', data_src_pk)

	# import data
	csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
	atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"')

	# clean out staging area
	curs = conn.cursor()
	cmd = """delete from ref.atc_staging"""
	gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
	curs.close()
	conn.commit()
	_log.debug('ATC staging table emptied')

	# from file into staging table
	curs = conn.cursor()
	cmd = """insert into ref.atc_staging values (%s, %s, %s, %s, %s)"""
	first = False
	for atc_line in atc_reader:
		# skip first
		if not first:
			first = True
			continue

		# skip blanks
		if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == '':
			continue

		comment = ''
		unit = ''
		adro = ''

		# "1,1 mg O,P,R,..."
		if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
			tmp, unit, adro = regex.split('\s', atc_line[4])
		# "1,1 mg O,P,R bezogen auf ..."
		elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
			tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3)
		# "20 mg O"
		elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
			tmp, unit, adro = regex.split('\s', atc_line[4])
		# "20 mg O bezogen auf ..."
		elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
			tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3)
		# "Standarddosis: 1 Tablette oder 30 ml Mixtur"
		else:
			comment = atc_line[4]

		args = [
			atc_line[0].strip(),
			atc_line[2],
			unit,
			adro,
			comment
		]

		gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])

	curs.close()
	conn.commit()
	csv_file.close()
	_log.debug('ATC staging table loaded')

	# from staging table to real table
	curs = conn.cursor()
	args = {'src_pk': data_src_pk}
	cmd = """
insert into ref.atc (
	fk_data_source,
	code,
	term,
	comment,
	unit,
	administration_route
) select
	%(src_pk)s,
	atc,
	name,
	nullif(comment, ''),
	nullif(unit, ''),
	nullif(adro, '')

from
	ref.atc_staging
"""

	gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])

	curs.close()
	conn.commit()
	_log.debug('transfer from ATC staging table to real ATC table done')

	# clean out staging area
	curs = conn.cursor()
	cmd = """delete from ref.atc_staging"""
	gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
	curs.close()
	conn.commit()
	_log.debug('ATC staging table emptied')

	return True
Example #6
0
def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):

	if version is None:
		version = get_version(license_fname = license_fname)

	if version is None:
		raise ValueError('cannot detect LOINC version')

	_log.debug('importing LOINC version [%s]', version)

	# clean out staging area
	curs = conn.cursor()
	cmd = """DELETE FROM staging.loinc_staging"""
	gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
	curs.close()
	conn.commit()
	_log.debug('staging table emptied')

	# import data from csv file into staging table
	csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
	loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
	curs = conn.cursor()
	cmd = """INSERT INTO staging.loinc_staging values (%s%%s)""" % ('%s, ' * (len(loinc_fields) - 1))
	first = False
	for loinc_line in loinc_reader:
		if not first:
			first = True
			continue
		gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
	curs.close()
	conn.commit()
	csv_file.close()
	_log.debug('staging table loaded')

	# create data source record
	in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
	desc = in_file.read()
	in_file.close()
	args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
	queries = [
		# insert if not existing
		{'args': args, 'cmd': """
			INSERT INTO ref.data_source (name_long, name_short, version) SELECT
				%(name_long)s,
				%(name_short)s,
				%(ver)s
			WHERE NOT EXISTS (
				SELECT 1 FROM ref.data_source WHERE
					name_long = %(name_long)s
						AND
					name_short = %(name_short)s
						AND
					version = %(ver)s
			)"""
		},
		# update non-unique fields
		{'args': args, 'cmd': """
			UPDATE ref.data_source SET
				description = %(desc)s,
				source = %(url)s,
				lang = %(lang)s
			WHERE
				name_long = %(name_long)s
					AND
				name_short = %(name_short)s
					AND
				version = %(ver)s
			"""
		},
		# retrieve PK of data source
		{'args': args, 'cmd': """SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""}
	]
	curs = conn.cursor()
	rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True)
	data_src_pk = rows[0][0]
	curs.close()
	_log.debug('data source record created or updated, pk is #%s', data_src_pk)

	# import from staging table to real table
	args = {'src_pk': data_src_pk}
	queries = []
	queries.append ({
		'args': args,
		'cmd': """
			INSERT INTO ref.loinc (
				fk_data_source, term, code
			)
			SELECT
				%(src_pk)s,
				coalesce (
					nullif(long_common_name, ''),
					(
						coalesce(nullif(component, '') || ':', '') ||
						coalesce(nullif(property, '') || ':', '') ||
						coalesce(nullif(time_aspect, '') || ':', '') ||
						coalesce(nullif(system, '') || ':', '') ||
						coalesce(nullif(scale_type, '') || ':', '') ||
						coalesce(nullif(method_type, '') || ':', '')
					)
				),
				nullif(loinc_num, '')
			FROM
				staging.loinc_staging st_ls
			WHERE NOT EXISTS (
				SELECT 1 FROM ref.loinc r_l WHERE
					r_l.fk_data_source = %(src_pk)s
						AND
					r_l.code = nullif(st_ls.loinc_num, '')
						AND
					r_l.term = 	coalesce (
						nullif(st_ls.long_common_name, ''),
						(
							coalesce(nullif(st_ls.component, '') || ':', '') ||
							coalesce(nullif(st_ls.property, '') || ':', '') ||
							coalesce(nullif(st_ls.time_aspect, '') || ':', '') ||
							coalesce(nullif(st_ls.system, '') || ':', '') ||
							coalesce(nullif(st_ls.scale_type, '') || ':', '') ||
							coalesce(nullif(st_ls.method_type, '') || ':', '')
						)
					)
			)"""
	})
	queries.append ({
		'args': args,
		'cmd': """
			UPDATE ref.loinc SET
				comment = nullif(st_ls.comments, ''),
				component = nullif(st_ls.component, ''),
				property = nullif(st_ls.property, ''),
				time_aspect = nullif(st_ls.time_aspect, ''),
				system = nullif(st_ls.system, ''),
				scale_type = nullif(st_ls.scale_type, ''),
				method_type = nullif(st_ls.method_type, ''),
				related_names_1_old = nullif(st_ls.related_names_1_old, ''),
				grouping_class = nullif(st_ls.class, ''),
				loinc_internal_source = nullif(st_ls.source, ''),
				dt_last_change = nullif(st_ls.dt_last_change, ''),
				change_type = nullif(st_ls.change_type, ''),
				answer_list = nullif(st_ls.answer_list, ''),
				code_status = nullif(st_ls.status, ''),
				maps_to = nullif(st_ls.map_to, ''),
				scope = nullif(st_ls.scope, ''),
				normal_range = nullif(st_ls.normal_range, ''),
				ipcc_units = nullif(st_ls.ipcc_units, ''),
				reference = nullif(st_ls.reference, ''),
				exact_component_synonym = nullif(st_ls.exact_component_synonym, ''),
				molar_mass = nullif(st_ls.molar_mass, ''),
				grouping_class_type = nullif(st_ls.class_type, '')::smallint,
				formula = nullif(st_ls.formula, ''),
				species = nullif(st_ls.species, ''),
				example_answers = nullif(st_ls.example_answers, ''),
				acs_synonyms = nullif(st_ls.acs_synonyms, ''),
				base_name = nullif(st_ls.base_name, ''),
				final = nullif(st_ls.final, ''),
				naa_ccr_id = nullif(st_ls.naa_ccr_id, ''),
				code_table = nullif(st_ls.code_table, ''),
				is_set_root = nullif(st_ls.is_set_root, '')::boolean,
				panel_elements = nullif(st_ls.panel_elements, ''),
				survey_question_text = nullif(st_ls.survey_question_text, ''),
				survey_question_source = nullif(st_ls.survey_question_source, ''),
				units_required = nullif(st_ls.units_required, ''),
				submitted_units = nullif(st_ls.submitted_units, ''),
				related_names_2 = nullif(st_ls.related_names_2, ''),
				short_name = nullif(st_ls.short_name, ''),
				order_obs = nullif(st_ls.order_obs, ''),
				cdisc_common_tests = nullif(st_ls.cdisc_common_tests, ''),
				hl7_field_subfield_id = nullif(st_ls.hl7_field_subfield_id, ''),
				external_copyright_notice = nullif(st_ls.external_copyright_notice, ''),
				example_units = nullif(st_ls.example_units, ''),
				inpc_percentage = nullif(st_ls.inpc_percentage, ''),
				long_common_name = nullif(st_ls.long_common_name, '')
			FROM
				staging.loinc_staging st_ls
			WHERE
				fk_data_source = %(src_pk)s
					AND
				code = nullif(st_ls.loinc_num, '')
					AND
				term = coalesce (
					nullif(st_ls.long_common_name, ''),
					(
						coalesce(nullif(st_ls.component, '') || ':', '') ||
						coalesce(nullif(st_ls.property, '') || ':', '') ||
						coalesce(nullif(st_ls.time_aspect, '') || ':', '') ||
						coalesce(nullif(st_ls.system, '') || ':', '') ||
						coalesce(nullif(st_ls.scale_type, '') || ':', '') ||
						coalesce(nullif(st_ls.method_type, '') || ':', '')
					)
				)
		"""
	})
	curs = conn.cursor()
	gmPG2.run_rw_queries(link_obj = curs, queries = queries)
	curs.close()
	conn.commit()
	_log.debug('transfer from staging table to real table done')

	# clean out staging area
	curs = conn.cursor()
	cmd = """DELETE FROM staging.loinc_staging"""
	gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
	curs.close()
	conn.commit()
	_log.debug('staging table emptied')

	return True
Example #7
0
def create_sql(filename):

    csv_file = open(filename, mode='rt', encoding='utf-8-sig')

    csv_lines = gmTools.unicode_csv_reader(csv_file,
                                           fieldnames=field_names,
                                           delimiter=';',
                                           quotechar='"',
                                           dict=True)

    print SQL_start

    line_idx = 0
    skip_line = False
    for line in csv_lines:
        line_idx += 1
        print "-- line #%s" % line_idx
        # normalize field content
        for field in field_names:
            try:
                line[field] = line[field].strip().strip(';,').strip().replace(
                    "'", "''")
            except AttributeError:  # trailing fields are a list
                pass
        # verify required fields
        for field in non_empty_fields:
            if line[field] == '':
                print "-- ignoring line: empty field [%s]" % field
                print "--", line
                print ""
                skip_line = True
                break
        if skip_line:
            skip_line = False
            continue
        # verify numeric fields
        for field in numeric_fields:
            if line[field] == '':
                continue
            success, num_val = gmTools.input2decimal(initial=line[field])
            if not success:
                print "-- ignoring line: field [%s] not numeric: >>>%s<<<" % (
                    field, line[field])
                print "--", line
                print ""
                skip_line = True
                break
            line[field] = num_val
        if skip_line:
            skip_line = False
            continue

        # actually create SQL
        # loop over strengths
        for field in numeric_fields:
            if line[field] == '':
                continue
            line['brand_name'] = ('%%(product)s %%(%s)s (%%(company)s)' %
                                  field) % line
            line['strength'] = line[field]
            print SQL_stage_drug % line

    print SQL_end
Example #8
0
def create_sql(filename):

	csv_file = io.open(filename, mode = 'rt', encoding = u'utf8')

	csv_lines = gmTools.unicode_csv_reader (
		csv_file,
		fieldnames = field_names,
		delimiter = ';',
		quotechar = '"',
		dict = True
	)

	print SQL_start

	line_idx = 0
	skip_line = False
	for line in csv_lines:
		line_idx += 1
		print "-- line #%s" % line_idx
		# normalize field content
		for field in field_names:
			try:
				line[field] = line[field].strip().strip(u';,').strip().replace(u"'", u"''")
			except AttributeError:		# trailing fields are a list
				pass
		# verify required fields
		for field in non_empty_fields:
			if line[field] == u'':
				print "-- ignoring line: empty field [%s]" % field
				print "--", line
				print ""
				skip_line = True
				break
		if skip_line:
			skip_line = False
			continue
		# verify numeric fields
		for field in numeric_fields:
			if line[field] == u'':
				continue
			success, num_val = gmTools.input2decimal(initial = line[field])
			if not success:
				print "-- ignoring line: field [%s] not numeric: >>>%s<<<" % (field, line[field])
				print "--", line
				print ""
				skip_line = True
				break
			line[field] = num_val
		if skip_line:
			skip_line = False
			continue

		# actually create SQL
		# loop over strengths
		for field in numeric_fields:
			if line[field] == u'':
				continue
			line['brand_name'] = (u'%%(brand)s %%(%s)s (%%(company)s)' % field) % line
			line['strength'] = line[field]
			print SQL_stage_drug % line

	print SQL_end
Example #9
0
def atc_import(cfg_fname=None, conn=None):

    # read meta data
    _cfg.add_file_source(source='atc', file=cfg_fname, encoding='utf8')

    data_fname = os.path.join(os.path.dirname(cfg_fname),
                              _cfg.get(group='atc',
                                       option='data file',
                                       source_order=[
                                           ('atc', 'return')
                                       ]))  # must be in same dir as conf file
    version = _cfg.get(group='atc',
                       option='version',
                       source_order=[('atc', 'return')])
    lang = _cfg.get(group='atc',
                    option='language',
                    source_order=[('atc', 'return')])
    desc = _cfg.get(group='atc',
                    option='description',
                    source_order=[('atc', 'return')])
    url = _cfg.get(group='atc', option='url', source_order=[('atc', 'return')])
    name_long = _cfg.get(group='atc',
                         option='long name',
                         source_order=[('atc', 'return')])
    name_short = _cfg.get(group='atc',
                          option='short name',
                          source_order=[('atc', 'return')])

    _cfg.remove_source(source='atc')

    _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang,
               data_fname)

    args = {
        'ver': version,
        'desc': desc,
        'url': url,
        'name_long': name_long,
        'name_short': name_short,
        'lang': lang
    }

    # create data source record
    queries = [{
        'cmd':
        u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
        'args': args
    }, {
        'cmd': u"""
insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
	%(name_long)s,
	%(name_short)s,
	%(ver)s,
	%(desc)s,
	%(lang)s,
	%(url)s
)""",
        'args': args
    }, {
        'cmd':
        u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
        'args': args
    }]
    rows, idx = gmPG2.run_rw_queries(queries=queries, return_data=True)
    data_src_pk = rows[0][0]
    _log.debug('ATC data source record created, pk is #%s', data_src_pk)

    # import data
    csv_file = io.open(data_fname,
                       mode='rt',
                       encoding='utf8',
                       errors='replace')
    atc_reader = gmTools.unicode_csv_reader(csv_file,
                                            delimiter=",",
                                            quotechar='"')

    # clean out staging area
    curs = conn.cursor()
    cmd = u"""delete from ref.atc_staging"""
    gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd}])
    curs.close()
    conn.commit()
    _log.debug('ATC staging table emptied')

    # from file into staging table
    curs = conn.cursor()
    cmd = u"""insert into ref.atc_staging values (%s, %s, %s, %s, %s)"""
    first = False
    for atc_line in atc_reader:
        # skip first
        if not first:
            first = True
            continue

        # skip blanks
        if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[
                4] == u'':
            continue

        comment = u''
        unit = u''
        adro = u''

        # "1,1 mg O,P,R,..."
        if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
            tmp, unit, adro = regex.split('\s', atc_line[4])
        # "1,1 mg O,P,R bezogen auf ..."
        elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
            tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3)
        # "20 mg O"
        elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
            tmp, unit, adro = regex.split('\s', atc_line[4])
        # "20 mg O bezogen auf ..."
        elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
            tmp, unit, adro, comment = regex.split('\s', atc_line[4], 3)
        # "Standarddosis: 1 Tablette oder 30 ml Mixtur"
        else:
            comment = atc_line[4]

        args = [atc_line[0].strip(), atc_line[2], unit, adro, comment]

        gmPG2.run_rw_queries(link_obj=curs,
                             queries=[{
                                 'cmd': cmd,
                                 'args': args
                             }])

    curs.close()
    conn.commit()
    csv_file.close()
    _log.debug('ATC staging table loaded')

    # from staging table to real table
    curs = conn.cursor()
    args = {'src_pk': data_src_pk}
    cmd = u"""
insert into ref.atc (
	fk_data_source,
	code,
	term,
	comment,
	unit,
	administration_route
) select
	%(src_pk)s,
	atc,
	name,
	nullif(comment, ''),
	nullif(unit, ''),
	nullif(adro, '')

from
	ref.atc_staging
"""

    gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd, 'args': args}])

    curs.close()
    conn.commit()
    _log.debug('transfer from ATC staging table to real ATC table done')

    # clean out staging area
    curs = conn.cursor()
    cmd = u"""delete from ref.atc_staging"""
    gmPG2.run_rw_queries(link_obj=curs, queries=[{'cmd': cmd}])
    curs.close()
    conn.commit()
    _log.debug('ATC staging table emptied')

    return True