Exemple #1
0
 def enrich_from_sql(self):
     '''enrich using a database as source'''
     cursor = util.sql_connect().cursor()
     cursor.execute(
         '''SELECT substr(icpc,1,3) FROM journalen WHERE soepcode = 'E' ''')
     idx = 0  # we query only ICPC codes
     return self.icpc_enrichment(cursor, idx, from_sql=True)
def execute(in_dir, delim, out_file, age_gender=False, counts_med=False, 
				counts_med_enrich=False, counts_consult=False, counts_consult_enrich=False,
				counts_referral=False, counts_lab=False, tmprl=False, 
				enriched_tmprl=False, knowledge_driven=False, counts_no_knowledge=False, tmprl_no_knowledge=False,
				separate=False, HISes=[]):
	'''merge the in files to produce the out file'''
	merged = defaultdict(list)
	headers = ['ID']

	# we may not need this.
	ID2HIS = {}
	merged_test = defaultdict(list)

	# if we wish to separate, get dictionary of patient HIS sources using SQL.
	if separate:
		c = util.sql_connect().cursor()
		HISes_str = "','".join(HISes)
		q = '''SELECT patientnummer 
				FROM patienten
				WHERE PRAKTIJKCODE IN ('{}')'''.format(HISes_str)
		c.execute(q)
		
		ID2HIS = {row[0] : row[0] for row in c}

	if age_gender:
		headers = merge_file(in_dir+'/AG.csv', merged, headers, delim, separate, ID2HIS, merged_test)
		
	if counts_med:
		headers = merge_file(in_dir+'/C_M.csv', merged, headers, delim, separate, ID2HIS, merged_test)

	if counts_med_enrich:
		headers = merge_file(in_dir+'/C_M_enrich.csv', merged, headers, delim, separate, ID2HIS, merged_test)

	if counts_consult:
		headers = merge_file(in_dir+'/C_C.csv', merged, headers, delim, separate, ID2HIS, merged_test)
	
	if counts_consult_enrich:
		headers = merge_file(in_dir+'/C_C_enrich.csv', merged, headers, delim, separate, ID2HIS, merged_test)

	if counts_referral:
		headers = merge_file(in_dir+'/C_R.csv', merged, headers, delim, separate, ID2HIS, merged_test)
	
	if counts_lab:
		headers = merge_file(in_dir+'/C_L.csv', merged, headers, delim, separate, ID2HIS, merged_test)

	if tmprl:
		headers = merge_file(in_dir+'/T.csv', merged, headers, delim, separate, ID2HIS, merged_test)

	if enriched_tmprl:
		headers = merge_file(in_dir+'/T_enrich.csv', merged, headers, delim, separate, ID2HIS, merged_test)

	if knowledge_driven:
		headers = merge_file(in_dir+'/K.csv', merged, headers, delim, separate, ID2HIS, merged_test)

	if counts_no_knowledge:
		headers = merge_file(in_dir+'/C_NK.csv', merged, headers, delim, separate, ID2HIS, merged_test)

	if tmprl_no_knowledge:
		headers = merge_file(in_dir+'/T_NK.csv', merged, headers, delim, separate, ID2HIS, merged_test)

	headers = merge_file(in_dir+'/CRC.csv', merged, headers, delim, separate, ID2HIS, merged_test)
	
	# now write to new file (also check whether all results have same length)
	make_dir(out_file)
	out = io.write_csv(out_file)

	out.writerow(headers)
	skip=0
	for key in merged:
		if len(headers) != 1+len(merged[key]):
			print 'unequal to header amount ({} vs {})! watch out.'.format(len(headers),len(merged[key]))
			# skip+=1
			# continue
		out.writerow([key] + merged[key])

	if separate:
		out_file_test = out_file[:out_file.rfind('/')+1] + 'test' + out_file[out_file.rfind('/'):]
		make_dir(out_file_test)

		out = io.write_csv(out_file_test)
		
		out.writerow(headers)
		for key in merged_test:
			if len(headers) != 1+len(merged_test[key]):
				print 'unequal to header amount ({} vs {})! watch out.'.format(len(headers),len(merged_test[key]))
				# skip+=1
				# continue
			out.writerow([key] + merged_test[key])

	print '## Done Merging ##'
Exemple #3
0
def execute(in_dir,
            delim,
            out_file,
            age_gender=False,
            counts_med=False,
            counts_med_enrich=False,
            counts_consult=False,
            counts_consult_enrich=False,
            counts_referral=False,
            counts_lab=False,
            all_counts=False,
            tmprl=False,
            enriched_tmprl=False,
            knowledge_driven=False,
            counts_no_knowledge=False,
            tmprl_no_knowledge=False,
            separate=False,
            HISes=[]):
    '''merge the in files to produce the out file'''
    merged = defaultdict(list)
    headers = ['ID']

    # we may not need this.
    ID2HIS = {}
    merged_test = defaultdict(list)

    # if we wish to separate, get dictionary of patient HIS sources using SQL.
    if separate:
        c = util.sql_connect().cursor()
        HISes_str = "','".join(HISes)
        q = '''SELECT patientnummer 
				FROM patienten
				WHERE PRAKTIJKCODE IN ('{}')'''.format(HISes_str)
        c.execute(q)

        ID2HIS = {row[0]: row[0] for row in c}

    if age_gender:
        headers = merge_file(in_dir + '/AG.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if counts_med:
        headers = merge_file(in_dir + '/C_M.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if counts_med_enrich:
        headers = merge_file(in_dir + '/C_M_enrich.csv', merged, headers,
                             delim, separate, ID2HIS, merged_test)

    if counts_consult:
        headers = merge_file(in_dir + '/C_C.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if counts_consult_enrich:
        headers = merge_file(in_dir + '/C_C_enrich.csv', merged, headers,
                             delim, separate, ID2HIS, merged_test)

    if counts_referral:
        headers = merge_file(in_dir + '/C_R.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if counts_lab:
        headers = merge_file(in_dir + '/C_L.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if tmprl:
        headers = merge_file(in_dir + '/T.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if enriched_tmprl:
        headers = merge_file(in_dir + '/T_enrich.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if knowledge_driven:
        headers = merge_file(in_dir + '/K.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if counts_no_knowledge:
        headers = merge_file(in_dir + '/C_NK.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if tmprl_no_knowledge:
        headers = merge_file(in_dir + '/T_NK.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    if all_counts:
        print('ja')
        headers = merge_file(in_dir + '/counts.csv', merged, headers, delim,
                             separate, ID2HIS, merged_test)

    headers = merge_file(in_dir + '/stroke.csv', merged, headers, delim,
                         separate, ID2HIS, merged_test)

    # now write to new file (also check whether all results have same length)
    make_dir(out_file)
    out = io.write_csv(out_file)

    out.writerow(headers)
    skip = 0

    for key in merged:
        if len(headers) != 1 + len(merged[key]):
            print('unequal to header amount ({} vs {})! watch out.'.format(
                len(headers), len(merged[key])))
            # skip+=1
            # continue
        out.writerow([key] + merged[key])

    if separate:
        out_file_test = out_file[:out_file.rfind('/') +
                                 1] + 'test' + out_file[out_file.rfind('/'):]
        make_dir(out_file_test)

        out = io.write_csv(out_file_test)

        out.writerow(headers)
        for key in merged_test:
            if len(headers) != 1 + len(merged_test[key]):
                print('unequal to header amount ({} vs {})! watch out.'.format(
                    len(headers), len(merged_test[key])))
                # skip+=1
                # continue
            out.writerow([key] + merged_test[key])

    print('## Done Merging ##')
 def enrich_from_sql(self):
     '''enrich using a database as source'''
     cursor = util.sql_connect().cursor()
     cursor.execute('''SELECT atc_code FROM medicaties''')
     idx = 0  # we query only ATC codes
     return self.atc_enrichment(cursor, idx)
Exemple #5
0
    def process_sql(self, needs_processing):
        '''converts the specified sql tables to usable data'''

        # connect to database and get a cursor handle
        cursor = util.sql_connect().cursor()

        # put the IDs of the 'main' file in a dict
        headers = self.get_IDs_sql(cursor)

        # add CRC value to each patient
        self.get_CRC_occurrences_sql(cursor)

        # randomize dates
        self.insert_data_intervals()

        # gather data from medication csv
        if 'medication' in needs_processing and needs_processing['medication']:
            print '...processing medication'
            fields = ['patientnummer', 'atc_code',
                      'voorschrijfdatum']  # make headers
            fields_str = ','.join(fields)  # fields to comma-separated string

            # build query, execute
            query = 'SELECT {} FROM medicaties'.format(fields_str)
            self.execute(cursor, query)

            med_headers, self.num_med, self.num_med_pos = self.insert_data(
                cursor,
                fields,
                'atc_code', ['voorschrijfdatum', 'voorschrijfdatum'],
                '[A-Z][0-9][0-9]',
                3,
                suffix='atc')
            headers = headers + med_headers

        # gather data from consult csv
        if 'consults' in needs_processing and needs_processing['consults']:
            print '...processing consults'
            fields = ['patientnummer', 'soepcode', 'icpc',
                      'datum']  # make headers
            fields_str = ','.join(fields)  # fields to comma-separated string

            # build query, execute
            query = 'SELECT {} FROM journalen'.format(fields_str)
            self.execute(cursor, query)

            consult_headers, self.num_cons, self.num_cons_pos = self.insert_data(
                cursor,
                fields,
                'icpc', ['datum', 'datum'],
                '[A-Z][0-9][0-9]',
                3,
                incorporate_SOEP='soepcode')
            headers = headers + consult_headers

        # gather data from referral csv
        if 'referrals' in needs_processing and needs_processing['referrals']:
            print '...processing referrals'
            fields = ['patientnummer', 'specialisme', 'datum']  # make headers
            fields_str = ','.join(fields)  # fields to comma-separated string

            # build query, execute
            query = 'SELECT {} FROM doorverwijzingen'.format(fields_str)
            self.execute(cursor, query)

            ref_headers, _, _ = self.insert_data(cursor, fields, 'specialisme',
                                                 ['datum', 'datum'], '.*',
                                                 None)
            headers = headers + ref_headers

        # gather data from comorbidity csv
        if 'comorbidity' in needs_processing and needs_processing[
                'comorbidity']:
            print '...processing comorbidity'
            fields = [
                'patientnummer', 'omschrijving', 'begindatum', 'einddatum'
            ]  # make headers
            fields_str = ','.join(fields)  # fields to comma-separated string

            # build query, execute
            query = 'SELECT {} FROM comorbiditeit'.format(fields_str)
            self.execute(cursor, query)

            comor_headers, _, _ = self.insert_data(cursor,
                                                   fields,
                                                   'omschrijving',
                                                   ['begindatum', 'einddatum'],
                                                   '.+',
                                                   None,
                                                   suffix='comorbiditeit')
            headers = headers + comor_headers

        # gather data from lab results csv
        if 'lab_results' in needs_processing and needs_processing[
                'lab_results']:
            print '...processing lab results'
            fields = [
                'patientnummer', 'codenorm', 'datum', 'waarde',
                'referentie_minimum', 'referentie_maximum'
            ]  # make headers
            fields_str = ','.join(fields)  # fields to comma-separated string

            # build query, execute
            query = 'SELECT {} FROM labuitslagen'.format(fields_str)
            self.execute(cursor, query)

            lab_headers, self.num_lab, self.num_lab_pos = self.insert_data(
                cursor,
                fields,
                'codenorm', ['datum', 'datum'],
                '.+',
                None,
                suffix='lab_results')
            headers = headers + lab_headers

        # move CRC indicator to end of each instance data list
        self.move_target_to_end_of_list()

        # append target element to headers, add to class var
        headers.append('target')
        self.headers = headers
	def enrich_from_sql(self):
		'''enrich using a database as source'''
		cursor = util.sql_connect().cursor()
		cursor.execute('''SELECT substr(icpc,1,3) FROM journalen WHERE soepcode = 'E' ''')
		idx = 0 # we query only ICPC codes
		return self.icpc_enrichment(cursor, idx, from_sql=True)
	def enrich_from_sql(self):
		'''enrich using a database as source'''
		cursor = util.sql_connect().cursor()
		cursor.execute('''SELECT atc_code FROM medicaties''')
		idx = 0 # we query only ATC codes
		return self.atc_enrichment(cursor, idx)