def enrich_from_sql(self): '''enrich using a database as source''' cursor = util.sql_connect().cursor() cursor.execute( '''SELECT substr(icpc,1,3) FROM journalen WHERE soepcode = 'E' ''') idx = 0 # we query only ICPC codes return self.icpc_enrichment(cursor, idx, from_sql=True)
def execute(in_dir, delim, out_file, age_gender=False, counts_med=False, counts_med_enrich=False, counts_consult=False, counts_consult_enrich=False, counts_referral=False, counts_lab=False, tmprl=False, enriched_tmprl=False, knowledge_driven=False, counts_no_knowledge=False, tmprl_no_knowledge=False, separate=False, HISes=[]): '''merge the in files to produce the out file''' merged = defaultdict(list) headers = ['ID'] # we may not need this. ID2HIS = {} merged_test = defaultdict(list) # if we wish to separate, get dictionary of patient HIS sources using SQL. if separate: c = util.sql_connect().cursor() HISes_str = "','".join(HISes) q = '''SELECT patientnummer FROM patienten WHERE PRAKTIJKCODE IN ('{}')'''.format(HISes_str) c.execute(q) ID2HIS = {row[0] : row[0] for row in c} if age_gender: headers = merge_file(in_dir+'/AG.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_med: headers = merge_file(in_dir+'/C_M.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_med_enrich: headers = merge_file(in_dir+'/C_M_enrich.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_consult: headers = merge_file(in_dir+'/C_C.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_consult_enrich: headers = merge_file(in_dir+'/C_C_enrich.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_referral: headers = merge_file(in_dir+'/C_R.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_lab: headers = merge_file(in_dir+'/C_L.csv', merged, headers, delim, separate, ID2HIS, merged_test) if tmprl: headers = merge_file(in_dir+'/T.csv', merged, headers, delim, separate, ID2HIS, merged_test) if enriched_tmprl: headers = merge_file(in_dir+'/T_enrich.csv', merged, headers, delim, separate, ID2HIS, merged_test) if knowledge_driven: headers = merge_file(in_dir+'/K.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_no_knowledge: headers = merge_file(in_dir+'/C_NK.csv', merged, headers, delim, separate, ID2HIS, merged_test) if tmprl_no_knowledge: headers = merge_file(in_dir+'/T_NK.csv', merged, headers, delim, separate, ID2HIS, merged_test) headers = merge_file(in_dir+'/CRC.csv', merged, headers, delim, separate, ID2HIS, merged_test) # now write to new file (also check whether all results have same length) make_dir(out_file) out = io.write_csv(out_file) out.writerow(headers) skip=0 for key in merged: if len(headers) != 1+len(merged[key]): print 'unequal to header amount ({} vs {})! watch out.'.format(len(headers),len(merged[key])) # skip+=1 # continue out.writerow([key] + merged[key]) if separate: out_file_test = out_file[:out_file.rfind('/')+1] + 'test' + out_file[out_file.rfind('/'):] make_dir(out_file_test) out = io.write_csv(out_file_test) out.writerow(headers) for key in merged_test: if len(headers) != 1+len(merged_test[key]): print 'unequal to header amount ({} vs {})! watch out.'.format(len(headers),len(merged_test[key])) # skip+=1 # continue out.writerow([key] + merged_test[key]) print '## Done Merging ##'
def execute(in_dir, delim, out_file, age_gender=False, counts_med=False, counts_med_enrich=False, counts_consult=False, counts_consult_enrich=False, counts_referral=False, counts_lab=False, all_counts=False, tmprl=False, enriched_tmprl=False, knowledge_driven=False, counts_no_knowledge=False, tmprl_no_knowledge=False, separate=False, HISes=[]): '''merge the in files to produce the out file''' merged = defaultdict(list) headers = ['ID'] # we may not need this. ID2HIS = {} merged_test = defaultdict(list) # if we wish to separate, get dictionary of patient HIS sources using SQL. if separate: c = util.sql_connect().cursor() HISes_str = "','".join(HISes) q = '''SELECT patientnummer FROM patienten WHERE PRAKTIJKCODE IN ('{}')'''.format(HISes_str) c.execute(q) ID2HIS = {row[0]: row[0] for row in c} if age_gender: headers = merge_file(in_dir + '/AG.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_med: headers = merge_file(in_dir + '/C_M.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_med_enrich: headers = merge_file(in_dir + '/C_M_enrich.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_consult: headers = merge_file(in_dir + '/C_C.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_consult_enrich: headers = merge_file(in_dir + '/C_C_enrich.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_referral: headers = merge_file(in_dir + '/C_R.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_lab: headers = merge_file(in_dir + '/C_L.csv', merged, headers, delim, separate, ID2HIS, merged_test) if tmprl: headers = merge_file(in_dir + '/T.csv', merged, headers, delim, separate, ID2HIS, merged_test) if enriched_tmprl: headers = merge_file(in_dir + '/T_enrich.csv', merged, headers, delim, separate, ID2HIS, merged_test) if knowledge_driven: headers = merge_file(in_dir + '/K.csv', merged, headers, delim, separate, ID2HIS, merged_test) if counts_no_knowledge: headers = merge_file(in_dir + '/C_NK.csv', merged, headers, delim, separate, ID2HIS, merged_test) if tmprl_no_knowledge: headers = merge_file(in_dir + '/T_NK.csv', merged, headers, delim, separate, ID2HIS, merged_test) if all_counts: print('ja') headers = merge_file(in_dir + '/counts.csv', merged, headers, delim, separate, ID2HIS, merged_test) headers = merge_file(in_dir + '/stroke.csv', merged, headers, delim, separate, ID2HIS, merged_test) # now write to new file (also check whether all results have same length) make_dir(out_file) out = io.write_csv(out_file) out.writerow(headers) skip = 0 for key in merged: if len(headers) != 1 + len(merged[key]): print('unequal to header amount ({} vs {})! watch out.'.format( len(headers), len(merged[key]))) # skip+=1 # continue out.writerow([key] + merged[key]) if separate: out_file_test = out_file[:out_file.rfind('/') + 1] + 'test' + out_file[out_file.rfind('/'):] make_dir(out_file_test) out = io.write_csv(out_file_test) out.writerow(headers) for key in merged_test: if len(headers) != 1 + len(merged_test[key]): print('unequal to header amount ({} vs {})! watch out.'.format( len(headers), len(merged_test[key]))) # skip+=1 # continue out.writerow([key] + merged_test[key]) print('## Done Merging ##')
def enrich_from_sql(self): '''enrich using a database as source''' cursor = util.sql_connect().cursor() cursor.execute('''SELECT atc_code FROM medicaties''') idx = 0 # we query only ATC codes return self.atc_enrichment(cursor, idx)
def process_sql(self, needs_processing): '''converts the specified sql tables to usable data''' # connect to database and get a cursor handle cursor = util.sql_connect().cursor() # put the IDs of the 'main' file in a dict headers = self.get_IDs_sql(cursor) # add CRC value to each patient self.get_CRC_occurrences_sql(cursor) # randomize dates self.insert_data_intervals() # gather data from medication csv if 'medication' in needs_processing and needs_processing['medication']: print '...processing medication' fields = ['patientnummer', 'atc_code', 'voorschrijfdatum'] # make headers fields_str = ','.join(fields) # fields to comma-separated string # build query, execute query = 'SELECT {} FROM medicaties'.format(fields_str) self.execute(cursor, query) med_headers, self.num_med, self.num_med_pos = self.insert_data( cursor, fields, 'atc_code', ['voorschrijfdatum', 'voorschrijfdatum'], '[A-Z][0-9][0-9]', 3, suffix='atc') headers = headers + med_headers # gather data from consult csv if 'consults' in needs_processing and needs_processing['consults']: print '...processing consults' fields = ['patientnummer', 'soepcode', 'icpc', 'datum'] # make headers fields_str = ','.join(fields) # fields to comma-separated string # build query, execute query = 'SELECT {} FROM journalen'.format(fields_str) self.execute(cursor, query) consult_headers, self.num_cons, self.num_cons_pos = self.insert_data( cursor, fields, 'icpc', ['datum', 'datum'], '[A-Z][0-9][0-9]', 3, incorporate_SOEP='soepcode') headers = headers + consult_headers # gather data from referral csv if 'referrals' in needs_processing and needs_processing['referrals']: print '...processing referrals' fields = ['patientnummer', 'specialisme', 'datum'] # make headers fields_str = ','.join(fields) # fields to comma-separated string # build query, execute query = 'SELECT {} FROM doorverwijzingen'.format(fields_str) self.execute(cursor, query) ref_headers, _, _ = self.insert_data(cursor, fields, 'specialisme', ['datum', 'datum'], '.*', None) headers = headers + ref_headers # gather data from comorbidity csv if 'comorbidity' in needs_processing and needs_processing[ 'comorbidity']: print '...processing comorbidity' fields = [ 'patientnummer', 'omschrijving', 'begindatum', 'einddatum' ] # make headers fields_str = ','.join(fields) # fields to comma-separated string # build query, execute query = 'SELECT {} FROM comorbiditeit'.format(fields_str) self.execute(cursor, query) comor_headers, _, _ = self.insert_data(cursor, fields, 'omschrijving', ['begindatum', 'einddatum'], '.+', None, suffix='comorbiditeit') headers = headers + comor_headers # gather data from lab results csv if 'lab_results' in needs_processing and needs_processing[ 'lab_results']: print '...processing lab results' fields = [ 'patientnummer', 'codenorm', 'datum', 'waarde', 'referentie_minimum', 'referentie_maximum' ] # make headers fields_str = ','.join(fields) # fields to comma-separated string # build query, execute query = 'SELECT {} FROM labuitslagen'.format(fields_str) self.execute(cursor, query) lab_headers, self.num_lab, self.num_lab_pos = self.insert_data( cursor, fields, 'codenorm', ['datum', 'datum'], '.+', None, suffix='lab_results') headers = headers + lab_headers # move CRC indicator to end of each instance data list self.move_target_to_end_of_list() # append target element to headers, add to class var headers.append('target') self.headers = headers
def enrich_from_sql(self): '''enrich using a database as source''' cursor = util.sql_connect().cursor() cursor.execute('''SELECT substr(icpc,1,3) FROM journalen WHERE soepcode = 'E' ''') idx = 0 # we query only ICPC codes return self.icpc_enrichment(cursor, idx, from_sql=True)