def querySourceItems(self, convOptions, progress=None, conn=None): """Query the database for list of all source clinical items (culture results, etc.) and yield the results one at a time. If startDate provided, only return items whose occurence date is on or after that date. """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Column headers to query for that map to respective fields in analysis table headers = ["order_proc_anon_id","pat_anon_id","pat_enc_csn_anon_id","proc_code","organism_name","antibiotic_name","suseptibility", "shifted_result_time"]; query = SQLQuery(); for header in headers: query.addSelect( header ); query.addFrom("stride_culture_micro"); # TODO: FIGURE OUT WHY CAN"T DO >= OPERATION HERE # if convOptions.startDate is not None: # query.addWhereOp("shifted_result_time",">=", convOptions.startDate); # if convOptions.endDate is not None: # query.addWhereOp("shifted_result_time","<", convOptions.endDate); # Still use begin date as common filter value # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: rowModel = RowItemModel( row, headers ); if rowModel['shifted_result_time'] is None: # Don't add if no result time given row = cursor.fetchone(); continue if rowModel['organism_name'] is not None: # if positive culture but results uninterpretable, don't add feature if rowModel['suseptibility'] is None or rowModel['antibiotic_name'] == 'Method' or rowModel['antibiotic_name'] is None: row = cursor.fetchone(); continue # So that we don't run into directory issues later when writing temp files try: rowModel['antibiotic_name'] = rowModel['antibiotic_name'].replace('/', '-') except: # When antibiotic name is none pass yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None): """Query the database for list of all source clinical items (lab results in this case) and yield the results one at a time. If startDate provided, only return items whose result_time is on or after that date. Only include results records where the result_flag is set to an informative value, to focus only on abnormal lab results (including would be a ton more relatively uninformative data that would greatly expend data space and subsequent computation time) """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Column headers to query for that map to respective fields in analysis table headers = ["sor.order_proc_id", "pat_id", "pat_enc_csn_id", "order_type", "proc_id", "proc_code", "base_name", "component_name", "common_name", "ord_num_value", "result_flag", "result_in_range_yn", "sor.result_time"]; query = SQLQuery(); for header in headers: query.addSelect( header ); query.addFrom("stride_order_proc as sop"); query.addFrom("%s as sor" % SOURCE_TABLE); query.addWhere("sop.order_proc_id = sor.order_proc_id"); #query.addWhere("result_flag <> '*'"); # Will exclude nulls and the uninformative '*' values for text-based microbiology results if startDate is not None: query.addWhereOp("sor.result_time",">=", startDate); if endDate is not None: query.addWhereOp("sor.result_time","<", endDate); # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: rowModel = RowItemModel( row, headers ); # Normalize qualified labels rowModel["order_proc_id"] = rowModel["sor.order_proc_id"]; rowModel["result_time"] = rowModel["sor.result_time"]; if rowModel['base_name'] is None: row = cursor.fetchone() continue self.populateResultFlag(rowModel,conn=conn); yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def querySourceItems(self, userSIDs, limit=None, offset=None, progress=None, conn=None): """Query the database for list of all AccessLogs and yield the results one at a time. If userSIDs provided, only return items matching those IDs. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table headers = [ "user_id", "user_name", "de_pat_id", "access_datetime", "metric_id", "metric_name", "line_count", "description", "metric_group_num", "metric_group_name" ] query = SQLQuery() for header in headers: query.addSelect(header) query.addFrom(self.sourceTableName) if userSIDs is not None: query.addWhereIn("user_id", userSIDs) query.setLimit(limit) query.setOffset(offset) # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) yield rowModel row = cursor.fetchone() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None): """Query the database for list of all source clinical items (orders, etc.) and yield the results one at a time. If startDate provided, only return items whose order_time is on or after that date. Ignore entries with instantiated_time not null, as those represent child orders spawned from an original order, whereas we are more interested in the decision making to enter the original order. """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Column headers to query for that map to respective fields in analysis table queryHeaders = ["op.order_proc_id", "pat_id", "pat_enc_csn_id", "op.order_type", "op.proc_id", "op.proc_code", "description", "order_time","protocol_id","protocol_name","section_name","smart_group"]; headers = ["order_proc_id", "pat_id", "pat_enc_csn_id", "order_type", "proc_id", "proc_code", "description", "order_time","protocol_id","protocol_name","section_name","smart_group"]; query = SQLQuery(); for header in queryHeaders: query.addSelect( header ); query.addFrom("stride_order_proc as op left outer join stride_orderset_order_proc as os on op.order_proc_id = os.order_proc_id"); query.addWhere("order_time is not null"); # Rare cases of "comment" orders with no date/time associated query.addWhere("instantiated_time is null"); query.addWhere("(stand_interval is null or stand_interval not like '%%PRN')"); # Ignore PRN orders to simplify somewhat if startDate is not None: query.addWhereOp("order_time",">=", startDate); if endDate is not None: query.addWhereOp("order_time","<", endDate); # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: rowModel = RowItemModel( row, headers ); yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def querySourceItems(self, convOptions, progress=None, conn=None): """Query the database for list of all source clinical items (medications, etc.) and yield the results one at a time. If startDate provided, only return items whose occurence date is on or after that date. """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Column headers to query for that map to respective fields in analysis table headers = ["stride_treatment_team_id","pat_id","pat_enc_csn_id","trtmnt_tm_begin_date","trtmnt_tm_end_date","treatment_team","prov_name"]; query = SQLQuery(); for header in headers: query.addSelect( header ); query.addFrom("stride_treatment_team"); if convOptions.startDate is not None: query.addWhereOp("trtmnt_tm_begin_date",">=", convOptions.startDate); if convOptions.endDate is not None: query.addWhereOp("trtmnt_tm_begin_date","<", convOptions.endDate); # Still use begin date as common filter value # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: rowModel = RowItemModel( row, headers ); for normalizedModel in self.normalizeRowModel(rowModel, convOptions, conn=conn): yield normalizedModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def queryMixSourceItems(self, rxcuiDataByMedId, convOptions, progress=None, conn=None): """Query the database for list of source clinical items (medications from mixes, etc.) and yield the results one at a time. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table queryHeaders = [ "med.order_med_id", "med.pat_id", "med.pat_enc_csn_id", "mix.medication_id", "mix.medication_name", "mix.ingredient_type", "med.ordering_date", "med.med_route", "med.number_of_doses", "protocol_id", "protocol_name", "section_name", "smart_group" ] headers = [ "order_med_id", "pat_id", "pat_enc_csn_id", "medication_id", "description", "ingredient_type", "ordering_date", "med_route", "number_of_doses", "protocol_id", "protocol_name", "section_name", "smart_group" ] query = SQLQuery() for header in queryHeaders: query.addSelect(header) query.addFrom( "stride_order_med as med left outer join stride_orderset_order_med as os on med.order_med_id = os.order_med_id" ) # Grab order set links if they exist query.addFrom("stride_order_medmixinfo as mix") query.addWhere("med.order_med_id = mix.order_med_id") #query.addWhereEqual("med.medication_id", TEMPLATE_MEDICATION_ID ); #query.addWhere("mix.line = 1"); # Just take the first item from a mix query.addWhere("freq_name not like '%%PRN'") # Ignore PRN orders if convOptions.startDate is not None: query.addWhereOp("ordering_date", ">=", convOptions.startDate) if convOptions.endDate is not None: query.addWhereOp("ordering_date", "<", convOptions.endDate) query.addOrderBy("med.ordering_date, med.order_med_id, mix.line") # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) # Accumulate mixture components one item at a time mixByOrderMedId = dict() row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) orderMedId = rowModel["order_med_id"] if orderMedId not in mixByOrderMedId: # New medication mix encountered. Process any prior ones before moving on for normalizedModel in self.normalizeMixData( rxcuiDataByMedId, mixByOrderMedId, convOptions): yield normalizedModel # Yield one row worth of data at a time to avoid having to keep the whole result set in memory mixByOrderMedId.clear() # Discard previously processed mixes so don't have a ton left in memory mixByOrderMedId[orderMedId] = list() # Prep for next mix mixByOrderMedId[orderMedId].append(rowModel) row = cursor.fetchone() # One more pass for remaining items for normalizedModel in self.normalizeMixData(rxcuiDataByMedId, mixByOrderMedId, convOptions): yield normalizedModel # Yield one row worth of data at a time to avoid having to keep the whole result set in memory # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def querySourceItems(self, rxcuiDataByMedId, convOptions, progress=None, conn=None): """Query the database for list of all source clinical items (medications, etc.) and yield the results one at a time. If startDate provided, only return items whose ordering_date is on or after that date. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table queryHeaders = [ "med.order_med_id", "pat_id", "pat_enc_csn_id", "med.medication_id", "med.description", "ordering_date", "med_route", "number_of_doses", "protocol_id", "protocol_name", "section_name", "smart_group" ] headers = [ "order_med_id", "pat_id", "pat_enc_csn_id", "medication_id", "description", "ordering_date", "med_route", "number_of_doses", "protocol_id", "protocol_name", "section_name", "smart_group" ] query = SQLQuery() for header in queryHeaders: query.addSelect(header) query.addFrom( "stride_order_med as med left outer join stride_orderset_order_med as os on med.order_med_id = os.order_med_id" ) # Grab order set links if they exist query.addWhere("med.medication_id <> %s" % TEMPLATE_MEDICATION_ID) query.addWhere("freq_name not like '%%PRN'") # Ignore PRN orders if convOptions.startDate is not None: query.addWhereOp("ordering_date", ">=", convOptions.startDate) if convOptions.endDate is not None: query.addWhereOp("ordering_date", "<", convOptions.endDate) # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) for normalizedModel in self.normalizeMedData( rxcuiDataByMedId, rowModel, convOptions): yield normalizedModel # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def querySourceItems(self, patientIds=None, progress=None, conn=None): """Query the database for list of all patient demographics and yield the results one at a time. If patientIds provided, only return items matching those IDs. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table headers = [ "pat_id", "birth_year", "gender", "death_date", "race", "ethnicity" ] query = SQLQuery() for header in headers: query.addSelect(header) query.addFrom("stride_patient as sp") if patientIds is not None: query.addWhereIn("sp.pat_id", patientIds) # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) if rowModel["birth_year"] is None: # Blank values, doesn't make sense. Skip it log.warning(rowModel) else: # Record birth at resolution of year rowModel["itemDate"] = datetime(rowModel["birth_year"], 1, 1) rowModel["name"] = "Birth" rowModel["description"] = "Birth Year" yield rowModel # Record another at resolution of decade decade = (rowModel["birth_year"] / 10) * 10 rowModel["itemDate"] = datetime(rowModel["birth_year"], 1, 1) rowModel["name"] = "Birth%ds" % decade rowModel["description"] = "Birth Decade %ds" % decade yield rowModel # Summarize race and ethnicity information into single field of interest raceEthnicity = self.summarizeRaceEthnicity(rowModel) rowModel["itemDate"] = datetime(rowModel["birth_year"], 1, 1) rowModel["name"] = "Race" + (raceEthnicity.translate( None, " ()-/")) # Strip off punctuation rowModel["description"] = "Race/Ethnicity: %s" % raceEthnicity yield rowModel gender = rowModel["gender"].title() rowModel["name"] = gender rowModel["description"] = "%s Gender" % gender yield rowModel if rowModel["death_date"] is not None: rowModel["name"] = "Death" rowModel["description"] = "Death Date" rowModel["itemDate"] = rowModel["death_date"] yield rowModel row = cursor.fetchone() progress.Update() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def queryPatientItemsPerPatient(self, analysisOptions, progress=None, conn=None): """Query the database for an ordered list of patient clinical items, in the order in which they occurred. This could be a large amount of data, so option to provide list of specific patientIds or date ranges to query for. In either case, results will be returned as an iterator over individual lists for each patient. Lists will contain RowItemModels, each with data: * patient_id * encounter_id * clinical_item_id * item_date * analyze_date """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Reset for actual data selects query= SQLQuery(); query.addSelect("pi.patient_item_id"); query.addSelect("pi.patient_id"); query.addSelect("pi.encounter_id"); query.addSelect("pi.clinical_item_id"); query.addSelect("pi.item_date"); query.addSelect("pi.analyze_date"); query.addFrom("patient_item as pi"); query.addFrom("clinical_item as ci"); query.addWhere("pi.clinical_item_id = ci.clinical_item_id"); query.addWhere("ci.analysis_status <> 0"); # Skip steps designated to be ignored if analysisOptions.patientIds is not None: query.addWhereIn("patient_id", analysisOptions.patientIds ); if analysisOptions.startDate is not None: query.addWhereOp("pi.item_date",">=", analysisOptions.startDate); if analysisOptions.endDate is not None: query.addWhereOp("pi.item_date","<", analysisOptions.endDate); query.addOrderBy("pi.patient_id"); query.addOrderBy("pi.item_date"); query.addOrderBy("pi.clinical_item_id"); # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one patient at a time. # This should minimize the number of DB queries and the amount of # data that must be kept in memory at any one time. cursor.execute( str(query), tuple(query.params) ); currentPatientId = None; currentPatientData = list(); headers = ["patient_item_id","patient_id","encounter_id","clinical_item_id","item_date","analyze_date"]; row = cursor.fetchone(); while row is not None: (patient_item_id, patientId, encounter_id, clinicalItemId, itemDate, analyzeDate) = row; if currentPatientId is None: currentPatientId = patientId; if patientId != currentPatientId: # Changed user, yield the existing data for the previous user yield currentPatientData; # Update our data tracking for the current user currentPatientId = patientId; currentPatientData = list(); rowModel = RowItemModel( row, headers ); currentPatientData.append( rowModel ); row = cursor.fetchone(); # Yield the final user's data yield currentPatientData; # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None): """Query the database for list of all source clinical items (diagnosed probelms in this case) and yield the results one at a time. If startDate provided, only return items whose noted_date is on or after that date. """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Facilitate rapid lookup of ICD9/ICD10 codes if self.icd9_str_by_code is None: self.prepare_icd9_lookup(conn=conn) if self.icd10_str_by_code is None: self.prepare_icd10_lookup(conn=conn) # Column headers to query for that map to respective fields in analysis table headers = ["pat_id","pat_enc_csn_id","noted_date","resolved_date","dx_icd9_code","dx_icd9_code_list","dx_icd10_code_list","data_source"]; query = SQLQuery(); for header in headers: query.addSelect( header ); query.addFrom("stride_dx_list as dx"); query.addWhere("noted_date is not null"); # Only work with elements that have dates assigned for now if startDate is not None: query.addWhereOp("noted_date",">=", startDate); if endDate is not None: query.addWhereOp("noted_date","<", endDate); # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: row_model = RowItemModel( row, headers ); # 2014-2017 data does not have dx_icd9_code. Instead, has # both dx_icd9_code_list and dx_icd10_code_list. For these items, # there is a one:many mapping of source item to converted item. # Collect icd10 codes. icd10_codes = set() if row_model['dx_icd10_code_list'] not in ['', None]: codes = row_model['dx_icd10_code_list'].split(',') for code in codes: icd10_codes.add(code) # Collect icd9 codes. icd9_codes = set() if row_model['dx_icd9_code'] not in ['', None]: icd9_codes.add(row_model['dx_icd9_code']) else: if row_model['dx_icd9_code_list'] not in ['', None]: codes = row_model['dx_icd9_code_list'].split(',') for code in codes: icd9_codes.add(code) # If there are no ICD codes, skip to next row. if len(icd9_codes) == 0 and len(icd10_codes) == 0: row = cursor.fetchone() continue # Process ICD codes. # Build a temporary dictionary so that a single loop can take care # of both ICD9 and ICD10 without mixing the data. icd_versions = { 'ICD9': { 'codes': icd9_codes, 'lookup': self.icd9_str_by_code }, 'ICD10': { 'codes': icd10_codes, 'lookup': self.icd10_str_by_code } } for version, info in icd_versions.iteritems(): icd_codes = info['codes'] icd_lookup = info['lookup'] for icd_code in icd_codes: # Look up string. Otherwise default to ICD code. row_model['icd_str'] = icd_code if icd_code in icd_lookup: row_model['icd_str'] = icd_lookup[icd_code] row_model['dx_icd_code'] = version + '.' + icd_code # Yield one row worth of data at a time to avoid having to keep # the whole result set in memory. yield row_model orig_code = icd_code if SUBCODE_DELIM in orig_code: # Insert copies of item for parent node codes to aggregate # component diagnoses into general categories. while icd_code[-1] != SUBCODE_DELIM: icd_code = icd_code[:-1] # Truncate trailing digit if icd_code in icd_lookup: # Found a a matching parent code, so yield this # version. row_model['icd_str'] = icd_lookup[icd_code] row_model['dx_icd_code'] = version + '.' + icd_code yield row_model # One more cycle to get parent node with no subcode # delimiter at all. icd_code = icd_code[:-1] # Truncate off SUBCODE_DELIM if icd_code in icd_lookup: row_model['icd_str'] = icd_lookup[icd_code] row_model['dx_icd_code'] = version + '.' + icd_code yield row_model row = cursor.fetchone(); progress.Update(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
from medinfo.db import DBUtil; from medinfo.db.Model import SQLQuery; from medinfo.common.Util import ProgressDots; from medinfo.common.Util import log; conn = DBUtil.connection(); try: results = DBUtil.execute("select clinical_item_id from clinical_item where clinical_item_category_id = 161",conn=conn); clinicalItemIds = tuple([row[0] for row in results]); log.info("Deleting for %s Clinical Items" % len(clinicalItemIds) ); query = SQLQuery(); query.addSelect("patient_item_id"); query.addFrom("patient_item"); query.addWhereIn("clinical_item_id", clinicalItemIds ); prog = ProgressDots(); prog.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; # Go ahead and load full result set into memory, so don't have potential concurrency issues with deleting items as traversing them results = DBUtil.execute(query, conn=conn ); for row in results: patientItemId = row[0]; DBUtil.execute("delete from patient_item where patient_item_id = %s", (patientItemId,), conn=conn); prog.update(); prog.printStatus(); finally: conn.close();