def queryLabResults(outputFile, patientById): log.info("Query out lab results, takes a while") labBaseNames = \ ( 'ferritin','fe','trfrn','trfsat','ystfrr', 'wbc','hgb','hct','mcv','rdw','plt', 'retic','reticab','ldh','hapto','tbil','ibil','dbil', 'cr','esr','crp' ) formatter = TextResultsFormatter(outputFile) # Query rapid when filter by lab result type, limited to X records. # Filtering by patient ID drags down substantially until preloaded table by doing a count on the SOR table? colNames = [ "pat_id", "base_name", "common_name", "ord_num_value", "reference_unit", "result_flag", "sor.result_time" ] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("stride_order_results as sor, stride_order_proc as sop") query.addWhere("sor.order_proc_id = sop.order_proc_id") query.addWhereIn("base_name", labBaseNames) query.addWhereIn("pat_id", patientById.viewkeys()) query.addOrderBy("pat_id") query.addOrderBy("sor.result_time") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def _get_average_orders_per_patient(self): # Initialize DB cursor. cursor = self._connection.cursor() # Get average number of results for this lab test per patient. query = SQLQuery() if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE': #TODO: add STRIDE component routine query.addSelect('CAST(pat_id AS BIGINT) AS pat_id') query.addSelect('COUNT(sop.order_proc_id) AS num_orders') query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') query.addWhereIn("proc_code", [self._lab_panel]) components = self._get_components_in_lab_panel() query.addWhereIn("base_name", components) query.addGroupBy('pat_id') elif LocalEnv.DATASET_SOURCE_NAME == 'UMich': query.addSelect('CAST(pat_id AS BIGINT) AS pat_id') query.addSelect('COUNT(order_proc_id) AS num_orders') query.addFrom('labs') query.addWhereIn(self._varTypeInTable, [self._lab_var]) components = self._get_components_in_lab_panel() query.addWhereIn("base_name", components) query.addGroupBy('pat_id') log.debug('Querying median orders per patient...') results = DBUtil.execute(query) order_counts = [row[1] for row in results] if len(order_counts) == 0: error_msg = '0 orders for lab "%s."' % self._lab_var log.critical(error_msg) raise Exception(error_msg) # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception else: return numpy.median(order_counts)
def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect( "cic.description, ci.clinical_item_id, ci.name, ci.description") query.addFrom("clinical_item_category as cic") query.addFrom("clinical_item as ci") query.addWhere( "cic.clinical_item_category_id = ci.clinical_item_category_id") if options.itemPrefix: query.addWhereOp("ci.description", "like", options.itemPrefix + "%%") # Add wildcard to enabe prefix search if options.categoryNames: query.addWhereIn("cic.description", options.categoryNames.split(",")) query.addOrderBy( "cic.description, ci.name, ci.description, ci.clinical_item_id") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
def tearDown(self): """Restore state from any setUp or test steps""" log.info("Purge test records from the database") DBUtil.execute \ ( """delete from patient_item where clinical_item_id in ( select clinical_item_id from clinical_item as ci, clinical_item_category as cic where ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = '%s' ); """ % TEST_SOURCE_TABLE ) DBUtil.execute \ ( """delete from clinical_item where clinical_item_category_id in ( select clinical_item_category_id from clinical_item_category where source_table = '%s' ); """ % TEST_SOURCE_TABLE ) DBUtil.execute( "delete from clinical_item_category where source_table = '%s';" % TEST_SOURCE_TABLE) query = SQLQuery() query.delete = True query.addFrom("stride_patient") query.addWhere("pat_id < 0") DBUtil.execute(query) DBTestCase.tearDown(self)
def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None): """Query the database for list of all source clinical items (lab results in this case) and yield the results one at a time. If startDate provided, only return items whose result_time is on or after that date. Only include results records where the result_flag is set to an informative value, to focus only on abnormal lab results (including would be a ton more relatively uninformative data that would greatly expend data space and subsequent computation time) """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Column headers to query for that map to respective fields in analysis table headers = ["sor.order_proc_id", "pat_id", "pat_enc_csn_id", "order_type", "proc_id", "proc_code", "base_name", "component_name", "common_name", "ord_num_value", "result_flag", "result_in_range_yn", "sor.result_time"]; query = SQLQuery(); for header in headers: query.addSelect( header ); query.addFrom("stride_order_proc as sop"); query.addFrom("%s as sor" % SOURCE_TABLE); query.addWhere("sop.order_proc_id = sor.order_proc_id"); #query.addWhere("result_flag <> '*'"); # Will exclude nulls and the uninformative '*' values for text-based microbiology results if startDate is not None: query.addWhereOp("sor.result_time",">=", startDate); if endDate is not None: query.addWhereOp("sor.result_time","<", endDate); # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: rowModel = RowItemModel( row, headers ); # Normalize qualified labels rowModel["order_proc_id"] = rowModel["sor.order_proc_id"]; rowModel["result_time"] = rowModel["sor.result_time"]; if rowModel['base_name'] is None: row = cursor.fetchone() continue self.populateResultFlag(rowModel,conn=conn); yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def updateClinicalItemCounts(self, acceptCache=False, conn=None): """Update the summary item_counts for clinical_items based on clinical_item_association summary counts. If acceptCache is True, then will first check for existence of an entry "clinicalItemCountsUpdated" in the data_cache table. If it exists, assume we have done this update already, and no need to force the calculations again """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: if acceptCache: isCacheUpdated = (self.getCacheData( "clinicalItemCountsUpdated", conn=conn) is not None) if isCacheUpdated: # Item count caches already updated, no need to recalculate them return # First reset all counts to zero query = "update clinical_item set item_count = 0, patient_count = 0, encounter_count = 0 " params = [] if self.maxClinicalItemId is not None: # Restrict to (test) data query += "where clinical_item_id < %s" % DBUtil.SQL_PLACEHOLDER params.append(self.maxClinicalItemId) DBUtil.execute(query, params, conn=conn) sqlQuery = SQLQuery() sqlQuery.addSelect("clinical_item_id") sqlQuery.addSelect("count_0 as item_count") sqlQuery.addSelect("patient_count_0 as patient_count") sqlQuery.addSelect("encounter_count_0 as encounter_count") sqlQuery.addFrom("clinical_item_association as ci") sqlQuery.addWhere("clinical_item_id = subsequent_item_id") # Look along "diagonal" of matrix for primary summary stats if self.maxClinicalItemId is not None: # Restrict to (test) data sqlQuery.addWhereOp("clinical_item_id", "<", self.maxClinicalItemId) resultTable = DBUtil.execute(sqlQuery, includeColumnNames=True, conn=conn) resultModels = modelListFromTable(resultTable) for result in resultModels: DBUtil.updateRow("clinical_item", result, result["clinical_item_id"], conn=conn) # Make a note that this cache data has been updated self.setCacheData("clinicalItemCountsUpdated", "True", conn=conn) finally: if not extConn: conn.close()
def test_buildFeatureMatrix_prePostFeatures(self): """ Test features parameter in addClinicalItemFeatures which allows client to specify they only want .pre* or .post* columns in feature matrix. """ # Verify FeatureMatrixFactory throws Error if patientEpisodeInput # has not been set. with self.assertRaises(ValueError): self.factory.processPatientEpisodeInput() # Initialize DB cursor. cursor = self.connection.cursor() # Build SQL query for list of patient episodes. patientEpisodeQuery = SQLQuery() patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)") patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id") patientEpisodeQuery.addSelect("proc_code") patientEpisodeQuery.addSelect("order_time") patientEpisodeQuery.addSelect( "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results" ) patientEpisodeQuery.addFrom("stride_order_proc AS sop") patientEpisodeQuery.addFrom("stride_order_results AS sor") patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id") patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB") patientEpisodeQuery.addGroupBy( "pat_id, sop.order_proc_id, proc_code, order_time") patientEpisodeQuery.addOrderBy( "pat_id, sop.order_proc_id, proc_code, order_time") cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params) # Set and process patientEpisodeInput. self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time") self.factory.processPatientEpisodeInput() resultEpisodeIterator = self.factory.getPatientEpisodeIterator() resultPatientEpisodes = list() for episode in resultEpisodeIterator: episode["pat_id"] = int(episode["pat_id"]) episode["order_time"] = DBUtil.parseDateValue( episode["order_time"]) resultPatientEpisodes.append(episode) # Add TestItem100 and TestItem200 clinical item data. self.factory.addClinicalItemFeatures(["TestItem100"], features="pre") self.factory.addClinicalItemFeatures(["TestItem200"], features="post") self.factory.buildFeatureMatrix() resultMatrix = self.factory.readFeatureMatrixFile() expectedMatrix = FM_TEST_OUTPUT[ "test_buildFeatureMatrix_prePostFeatures"] self.assertEqualList(resultMatrix[2:], expectedMatrix)
def queryPatientClinicalItemData(self, analysisQuery, conn): """Query for all of the order / item data for each patient noted in the analysisQuery and yield them one list of clinicalItemIds at a time. Generated iterator over 2-ples (patientId, clinicalItemIdList) - Patient ID: ID of the patient for which the currently yielded item intended for - Clinical Item ID List: List of all of the clinical items / orders for this patient ordered by item date (currently excluding those that are off the "default_recommend" / on the "default exclusion" list). """ sqlQuery = SQLQuery(); sqlQuery.addSelect("pi.patient_id"); sqlQuery.addSelect("pi.clinical_item_id"); #sqlQuery.addSelect("pi.item_date"); sqlQuery.addFrom("clinical_item_category as cic"); sqlQuery.addFrom("clinical_item as ci"); sqlQuery.addFrom("patient_item as pi"); sqlQuery.addWhere("cic.clinical_item_category_id = ci.clinical_item_category_id"); sqlQuery.addWhere("ci.clinical_item_id = pi.clinical_item_id"); sqlQuery.addWhereIn("pi.patient_id", analysisQuery.patientIds ); sqlQuery.addOrderBy("pi.patient_id"); sqlQuery.addOrderBy("pi.item_date"); # Execute the actual query for patient order / item data cursor = conn.cursor(); cursor.execute( str(sqlQuery), tuple(sqlQuery.params) ); currentPatientId = None; clinicalItemIdList = list(); row = cursor.fetchone(); while row is not None: (patientId, clinicalItemId) = row; if currentPatientId is None: currentPatientId = patientId; if patientId != currentPatientId: # Changed patient, yield the existing data for the previous patient yield (currentPatientId, clinicalItemIdList); # Update our data tracking for the current patient currentPatientId = patientId; clinicalItemIdList = list(); clinicalItemIdList.append(clinicalItemId); row = cursor.fetchone(); # Yield / return the last patient data yield (currentPatientId, clinicalItemIdList); cursor.close();
def clinicalItemSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() #query.openWhereOrClause() for searchField in ("ci.description", ): for searchWord in searchWords: query.addWhereOp( searchField, "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord }) # Prefix search by regular expression #query.closeWhereOrClause() if itemQuery.sourceTables: query.addWhereIn("cic.source_table", itemQuery.sourceTables) if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addWhere( "ci.item_count <> 0" ) # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis) if itemQuery.sortField: query.addOrderBy(itemQuery.sortField) query.addOrderBy("cic.description") query.addOrderBy("ci.name") query.addOrderBy("ci.description") if itemQuery.resultCount is not None: query.limit = itemQuery.resultCount dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def loadRespiratoryViralPanelItemIds(extractor): # labCategoryId = 6; labCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Lab'")[0][0]; query = SQLQuery(); query.addSelect("clinical_item_id"); query.addFrom("clinical_item"); query.addWhereEqual("analysis_status", 1); query.addWhereIn("clinical_item_category_id", (labCategoryId,) ); query.addWhere("description ~* '%s'" % 'Respiratory.*Panel' ); respiratoryViralPanelItemIds = set(); for row in DBUtil.execute(query): respiratoryViralPanelItemIds.add(row[0]); return respiratoryViralPanelItemIds;
def queryPatients(period, locations, rxCount): log.info( "Select patients fitting criteria in designated time period: (%s,%s)" % period) query = SQLQuery() query.addSelect("med.pat_id") query.addSelect("count(order_med_id)") query.addFrom("stride_mapped_meds as map") query.addFrom("stride_order_med as med") query.addFrom("stride_patient as pat") query.addWhere("analysis_status = 1") query.addWhere("map.medication_id = med.medication_id") query.addWhere("med.pat_id = pat.pat_id") query.addWhere("possible_oncology = 0") query.addWhereIn("patient_location", locations) query.addWhereOp("ordering_datetime", ">", period[0]) query.addWhereOp("ordering_datetime", "<", period[-1]) query.addGroupBy("med.pat_id") query.addHaving("count(order_med_id) >2") results = DBUtil.execute(query) cols = ["patientId", "nOpioidRx"] patientDF = pd.DataFrame(results, columns=cols) #patientDF.set_index("patientId",drop=False,inplace=True); patientDF["periodStart"] = period[0] # Identify this group of patient records return patientDF
def loadBloodCultureItemIds(extractor): # microCategoryId = 15; microCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Microbiology'")[0][0]; # Look for diagnostic tests indicating suspected infection / sepsis query = SQLQuery(); query.addSelect("clinical_item_id"); query.addFrom("clinical_item"); query.addWhereEqual("analysis_status", 1); query.addWhereIn("clinical_item_category_id", (microCategoryId,) ); query.addWhere("description ~* '%s'" % 'Blood Culture' ); bloodCultureItemIds = set(); for row in DBUtil.execute(query): bloodCultureItemIds.add(row[0]); return bloodCultureItemIds;
def get_cnt(lab, lab_type, columns): if lab_type == 'panel': query = SQLQuery() for column in columns: query.addSelect(column) query.addFrom('stride_order_proc') query.addWhere("proc_code='%s'" % lab) query.addWhere("order_time >= '%s-01-01'" % str(2014)) query.addWhere("order_time <= '%s-12-31'" % str(2016)) # query.addWhere("order_status = 'Completed'") # TODO: what about "" results = DBUtil.execute(query) return results
def _getNonNullComponents(self): query = SQLQuery() # SELECT query.addSelect(BASE_NAME) # query.addSelect('max_result_flag') # FROM query.addFrom('order_result_stat') # WHERE query.addWhere('max_result_flag is not null') results = DBUtil.execute(query) pd.DataFrame(results, columns=query.select).to_csv(DATA_FOLDER + 'base_names.csv', index=False)
def _getNonNullBaseNames(self): query = SQLQuery() # SELECT query.addSelect(BASE_NAME) query.addSelect('max_result_flag') # FROM query.addFrom('order_result_stat') # WHERE query.addWhere('max_result_flag is not null') print query print query.getParams() DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=query.select).to_csv('base_names.csv', index=False)
def _getComponentItemIds(self): query = SQLQuery() # SELECT query.addSelect(CLINICAL_ITEM_ID) query.addSelect('name') # FROM query.addFrom('clinical_item') # WHERE query.addWhere('clinical_item_category_id = 58') query.addOrderBy('name') results = DBUtil.execute(query) df = pd.DataFrame(results, columns=query.select) df['base_name'] = df['name'].str.replace('\([a-z]*\)', '', case=False).str.strip() df.to_csv(DATA_FOLDER + 'result_ids.csv', index=False)
def _getNonNullLabs(self): query = SQLQuery() # SELECT query.addSelect('proc_code') # FROM query.addFrom('stride_order_proc') # WHERE query.addWhereLike('proc_code', 'LAB%') query.addWhere('abnormal_yn is not null') query.addGroupBy('proc_code') query.addOrderBy('proc_code') results = DBUtil.execute(query) df = pd.DataFrame(results, columns=query.select).to_csv( DATA_FOLDER + 'proc_codes.csv', index=False)
def test_addTimeCycleFeatures(self): """ Test .addTimeCycleFeatures() """ # Initialize DB cursor. cursor = self.connection.cursor() # Build SQL query for list of patient episodes. patientEpisodeQuery = SQLQuery() patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)") patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id") patientEpisodeQuery.addSelect("proc_code") patientEpisodeQuery.addSelect("order_time") patientEpisodeQuery.addSelect( "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results" ) patientEpisodeQuery.addFrom("stride_order_proc AS sop") patientEpisodeQuery.addFrom("stride_order_results AS sor") patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id") patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB") patientEpisodeQuery.addGroupBy( "pat_id, sop.order_proc_id, proc_code, order_time") patientEpisodeQuery.addOrderBy( "pat_id, sop.order_proc_id, proc_code, order_time") cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params) # Set and process patientEpisodeInput. self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time") self.factory.processPatientEpisodeInput() # Add time cycle features. self.factory.addTimeCycleFeatures("order_time", "month") self.factory.addTimeCycleFeatures("order_time", "hour") # Verify output. self.factory.buildFeatureMatrix() resultMatrix = self.factory.readFeatureMatrixFile() expectedMatrix = FM_TEST_OUTPUT["test_addTimeCycleFeatures"][ "expectedMatrix"] self.assertEqualTable(expectedMatrix, resultMatrix[2:], precision=5) # Clean up feature matrix. try: os.remove(self.factory.getMatrixFileName()) except OSError: pass
def _getPatientsComponentsHistories(self, item_ids): query = SQLQuery() # SELECT query.addSelect('patient_id') query.addSelect('name') query.addSelect('item_date') # FROM query.addFrom('clinical_item as ci') query.addFrom('patient_item as pi') # WHERE query.addWhere('ci.clinical_item_id = pi.clinical_item_id') query.addWhereIn('ci.clinical_item_id', item_ids) query.addOrderBy('patient_id') query.addOrderBy('item_date') # print query # print query.getParams() return customDBUtil.execute(query)
def loadIVAntibioticItemIds(extractor): # ivMedCategoryId = 72; ivMedCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%(Intravenous)'")[0][0]; # Look for any IV antibiotics based on expected names query = SQLQuery(); query.addSelect("clinical_item_id"); query.addFrom("clinical_item"); query.addWhereEqual("analysis_status", 1); query.addWhereEqual("clinical_item_category_id", ivMedCategoryId); query.openWhereOrClause(); for row in extractor.loadMapData("IVAntibiotics.Names"): query.addWhere("description ~* '%s'" % row["name"] ); query.closeWhereOrClause(); ivAntibioticItemIds = set(); for row in DBUtil.execute(query): ivAntibioticItemIds.add(row[0]); return ivAntibioticItemIds;
def loadNotes(self, patientId, currentTime, conn=None): """Load notes committed up to the given simulation time. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("sn.sim_note_id") query.addSelect("sps.sim_patient_id") # Link query.addSelect("sn.sim_state_id") query.addSelect("sn.note_type_id") query.addSelect("sn.author_type_id") query.addSelect("sn.service_type_id") query.addSelect( "(sps.relative_time_start + sn.relative_state_time) as relative_time" ) query.addSelect("sn.content") query.addFrom("sim_note as sn") query.addFrom("sim_patient_state as sps") query.addWhere("sn.sim_state_id = sps.sim_state_id") query.addWhereEqual("sps.sim_patient_id", patientId) # Only unlock notes once traverse expected time query.addWhereOp( "(sps.relative_time_start + sn.relative_state_time)", "<=", currentTime) query.addOrderBy( "(sps.relative_time_start + sn.relative_state_time)") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def _get_random_patient_list(self): #sx: this function is for avoid RANDOM() on the database cursor = self._connection.cursor() # Get average number of results for this lab test per patient. query = SQLQuery() query.addSelect('pat_id') query.addSelect('COUNT(sop.order_proc_id) AS num_orders') query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') ## query.addWhereIn("base_name", [self._component]) query.addGroupBy('pat_id') log.debug('Querying median orders per patient...') results = DBUtil.execute(query) order_counts = [ row[1] for row in results ] if len(results) == 0: error_msg = '0 orders for component "%s."' % self._component #sx log.critical(error_msg) sys.exit('[ERROR] %s' % error_msg) else: avg_orders_per_patient = numpy.median(order_counts) log.info('avg_orders_per_patient: %s' % avg_orders_per_patient) # Based on average # of results, figure out how many patients we'd # need to get for a feature matrix of requested size. self._num_patients = int(numpy.max([self._num_requested_episodes / \ avg_orders_per_patient, 1])) # Some components may have fewer associated patients than the required sample size patient_number_chosen = min([len(results),self._num_patients]) # inds_random_patients = numpy.random.choice(len(results), size=patient_number_chosen, replace=False) # print 'inds_random_patients:', inds_random_patients pat_IDs_random_patients = [] for ind in inds_random_patients: pat_IDs_random_patients.append(results[ind][0]) # print pat_IDs_random_patients return pat_IDs_random_patients
def queryDrugScreens( patientDF, period, locations ): log.info("Populate drug screens by primary locations"); query = SQLQuery(); query.addSelect("pat_id"); query.addSelect("count(distinct order_proc_id)"); query.addFrom("stride_order_proc_drug_screen"); query.addWhere("ordering_mode = 'Outpatient'"); query.addWhereIn("patient_location", locations ); query.addWhereOp("ordering_date",">", period[0]); query.addWhereOp("ordering_date","<", period[-1]); query.addWhereIn("pat_id", patientDF["patientId"] ); query.addGroupBy("pat_id"); results = DBUtil.execute(query); cols = ["patientId","nDrugScreens"]; newDF = pd.DataFrame(results,columns=cols); patientDF = patientDF.merge(newDF, how="left"); patientDF["nDrugScreens"][np.isnan(patientDF["nDrugScreens"])] = 0; # Populate default values if no data patientDF["nDrugScreens"] = patientDF["nDrugScreens"].astype("int"); # Beware of float conversion somewhere return patientDF;
def action_default(self): # Convert query category ID(s) into a list, even of size 1 categoryIds = self.requestData["clinical_item_category_id"].split(",") query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("ci.item_count") query.addFrom("clinical_item as ci") query.addWhere("analysis_status = 1") # Ignore specified items query.addWhereIn("ci.clinical_item_category_id", categoryIds) query.addOrderBy(self.requestData["orderBy"]) resultTable = DBUtil.execute(query, includeColumnNames=True) resultModels = modelListFromTable(resultTable) optionValues = [] optionTexts = [] displayFields = ("name", "description", "item_count") for resultModel in resultModels: optionValues.append(str(resultModel["clinical_item_id"])) orderField = self.requestData["orderBy"].split()[0] orderValue = resultModel[orderField] textValueList = [str(orderValue)] for field in displayFields: if field != orderField: textValueList.append(str(resultModel[field])) textValue = str.join(" - ", textValueList) optionTexts.append(textValue) # Conveniently, Python string representation coincides with JavaScript self.requestData["optionValuesJSON"] = str(optionValues) self.requestData["optionTextsJSON"] = str(optionTexts)
def loadPatientOrders(self, patientId, currentTime, loadActive=True, conn=None): """Load orders for the given patient that exist by the specified current time point. loadActive - Specify whether to load active vs. inactive/completed orders. Set to None to load both """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("po.sim_patient_order_id") query.addSelect("po.sim_user_id") query.addSelect("po.sim_patient_id") query.addSelect("po.sim_state_id") query.addSelect("po.clinical_item_id") query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") query.addWhereEqual("sim_patient_id", patientId) query.addWhereOp("relative_time_start", "<=", currentTime) if loadActive: # Filter out inactive orders here. query.openWhereOrClause() query.addWhere("relative_time_end is null") query.addWhereOp("relative_time_end", ">", currentTime) query.closeWhereOrClause() #elif loadActive is not None: # Filter out active orders here. # query.addWhereOp("relative_time_end","<=", currentTime) if loadActive: # Organize currently active orders by category query.addOrderBy("cic.description") query.addOrderBy("ci.description") query.addOrderBy("relative_time_start") else: # Otherwise chronologic order query.addOrderBy("relative_time_start") query.addOrderBy("cic.description") query.addOrderBy("ci.description") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def get_orders_per_year(lab_name, lab_col, table_name, years): query = SQLQuery() query.addSelect('base_name') query.addSelect('result_date') query.addFrom('stride_order_results') query.addWhere('base_name=\'%s\''%lab_name) query.addWhere("result_date >= '2010-01-01'") query.addWhere("result_date <= '2017-12-31'") # query.setLimit(100) all_recs = DBUtil.execute(query) cnts = [0] * len(years) for one_rec in all_recs: ind = one_rec[1].year - years[0] cnts[ind] += 1 f.write(lab_name+'\t') for cnt in cnts: f.write(str(cnt)+'\t') f.write('\n')
def _get_average_orders_per_patient(self): # Initialize DB cursor. cursor = self._connection.cursor() # Get average number of results for this lab test per patient. query = SQLQuery() query.addSelect('pat_id') query.addSelect('COUNT(sop.order_proc_id) AS num_orders') query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') query.addWhereIn("proc_code", [self._lab_panel]) components = self._get_components_in_lab_panel() query.addWhereIn("base_name", components) query.addGroupBy('pat_id') log.debug('Querying median orders per patient...') results = DBUtil.execute(query) order_counts = [ row[1] for row in results ] if len(order_counts) == 0: error_msg = '0 orders for lab panel "%s."' % self._lab_panel log.critical(error_msg) sys.exit('[ERROR] %s' % error_msg) else: return numpy.median(order_counts)
def loadPendingResultOrders(self, patientId, relativeTime, conn=None): """Load all patient orders at the given relativeTime that are due to yield results, but have not yet. Include an estimate of time until results available. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect( "distinct po.clinical_item_id" ) # Distinct so don't report multiple times for panel orders query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect( "sorm.turnaround_time" ) # Could have different turnaround times for single order if different sub results. Just report each. query.addSelect( "sorm.turnaround_time - (%d - po.relative_time_start) as time_until_result" % relativeTime) # Calculate time until expect result query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("sim_order_result_map as sorm") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere("po.clinical_item_id = sorm.clinical_item_id") query.addWhereEqual("sim_patient_id", patientId) # Only catch orders up to the given relativeTime and not cancelled query.addWhereOp("relative_time_start", "<=", relativeTime) query.openWhereOrClause() query.addWhere("relative_time_end is null") query.addWhereOp("relative_time_end", ">", relativeTime) query.closeWhereOrClause() # Only PENDING orders, so don't report orders who results should already be available query.addWhereOp("sorm.turnaround_time + po.relative_time_start", ">", relativeTime) query.addOrderBy("time_until_result") query.addOrderBy("relative_time_start") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None): """Query the database for list of all source clinical items (orders, etc.) and yield the results one at a time. If startDate provided, only return items whose order_time is on or after that date. Ignore entries with instantiated_time not null, as those represent child orders spawned from an original order, whereas we are more interested in the decision making to enter the original order. """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Column headers to query for that map to respective fields in analysis table queryHeaders = ["op.order_proc_id", "pat_id", "pat_enc_csn_id", "op.order_type", "op.proc_id", "op.proc_code", "description", "order_time","protocol_id","protocol_name","section_name","smart_group"]; headers = ["order_proc_id", "pat_id", "pat_enc_csn_id", "order_type", "proc_id", "proc_code", "description", "order_time","protocol_id","protocol_name","section_name","smart_group"]; query = SQLQuery(); for header in queryHeaders: query.addSelect( header ); query.addFrom("stride_order_proc as op left outer join stride_orderset_order_proc as os on op.order_proc_id = os.order_proc_id"); query.addWhere("order_time is not null"); # Rare cases of "comment" orders with no date/time associated query.addWhere("instantiated_time is null"); query.addWhere("(stand_interval is null or stand_interval not like '%%PRN')"); # Ignore PRN orders to simplify somewhat if startDate is not None: query.addWhereOp("order_time",">=", startDate); if endDate is not None: query.addWhereOp("order_time","<", endDate); # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: rowModel = RowItemModel( row, headers ); yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def orderSetSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ic.item_collection_id") query.addSelect("ic.external_id") query.addSelect("ic.name as collection_name") query.addSelect("ic.section") query.addSelect("ic.subgroup") query.addSelect("ci.clinical_item_category_id") query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addFrom("item_collection as ic") query.addFrom("item_collection_item as ici") query.addFrom("clinical_item as ci") query.addWhere("ic.item_collection_id = ici.item_collection_id") query.addWhere("ici.clinical_item_id = ci.clinical_item_id") query.addWhereNotEqual("ic.section", AD_HOC_SECTION) if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() for searchWord in searchWords: query.addWhereOp( "ic.name", "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord}) # Prefix search by regular expression if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addOrderBy("lower(ic.name)") query.addOrderBy("ic.external_id") query.addOrderBy("lower(ic.section)") query.addOrderBy("lower(ic.subgroup)") query.addOrderBy("ci.clinical_item_id") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) # Aggregate up into order sets orderSetModel = None for row in dataModels: if orderSetModel is None or row[ "external_id"] != orderSetModel["external_id"]: if orderSetModel is not None: # Prior order set exists, yield/return it before preparing next one yield orderSetModel orderSetModel = \ { "external_id": row["external_id"], "name": row["collection_name"], "itemList": list(), } orderSetModel["itemList"].append(row) yield orderSetModel # Yield the last processed model finally: if not extConn: conn.close()