def queryLabResults(outputFile, patientById): log.info("Query out lab results, takes a while") labBaseNames = \ ( 'ferritin','fe','trfrn','trfsat','ystfrr', 'wbc','hgb','hct','mcv','rdw','plt', 'retic','reticab','ldh','hapto','tbil','ibil','dbil', 'cr','esr','crp' ) formatter = TextResultsFormatter(outputFile) # Query rapid when filter by lab result type, limited to X records. # Filtering by patient ID drags down substantially until preloaded table by doing a count on the SOR table? colNames = [ "pat_id", "base_name", "common_name", "ord_num_value", "reference_unit", "result_flag", "sor.result_time" ] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("stride_order_results as sor, stride_order_proc as sop") query.addWhere("sor.order_proc_id = sop.order_proc_id") query.addWhereIn("base_name", labBaseNames) query.addWhereIn("pat_id", patientById.viewkeys()) query.addOrderBy("pat_id") query.addOrderBy("sor.result_time") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def _getAdmitDateRange(self): # Get list of all clinical item IDs matching admit diagnosis. # Get this list in advance to make subsequent query run a bit faster. admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds() # Build query for earliest and latest admissions. # SELECT # MIN(item_date) AS first_admit_date, # MAX(item_date) AS last_admit_date, # FROM # patient_item # WHERE # clinical_item_id in (admitDxClinicalItemIds) query = SQLQuery() query.addSelect("MIN(item_date) AS first_admit_date") query.addSelect("MAX(item_date) AS last_admit_date") query.addFrom("patient_item") query.addWhereIn("clinical_item_id", admitDxClinicalItemIds) # Execute query and return results. results = self._executeCachedDBQuery(query) firstAdmitDate = DBUtil.parseDateValue(results[0][0]).date() lastAdmitDate = DBUtil.parseDateValue(results[0][1]).date() return firstAdmitDate, lastAdmitDate
def queryPatients(period, locations, rxCount): log.info( "Select patients fitting criteria in designated time period: (%s,%s)" % period) query = SQLQuery() query.addSelect("med.pat_id") query.addSelect("count(order_med_id)") query.addFrom("stride_mapped_meds as map") query.addFrom("stride_order_med as med") query.addFrom("stride_patient as pat") query.addWhere("analysis_status = 1") query.addWhere("map.medication_id = med.medication_id") query.addWhere("med.pat_id = pat.pat_id") query.addWhere("possible_oncology = 0") query.addWhereIn("patient_location", locations) query.addWhereOp("ordering_datetime", ">", period[0]) query.addWhereOp("ordering_datetime", "<", period[-1]) query.addGroupBy("med.pat_id") query.addHaving("count(order_med_id) >2") results = DBUtil.execute(query) cols = ["patientId", "nOpioidRx"] patientDF = pd.DataFrame(results, columns=cols) #patientDF.set_index("patientId",drop=False,inplace=True); patientDF["periodStart"] = period[0] # Identify this group of patient records return patientDF
def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect( "cic.description, ci.clinical_item_id, ci.name, ci.description") query.addFrom("clinical_item_category as cic") query.addFrom("clinical_item as ci") query.addWhere( "cic.clinical_item_category_id = ci.clinical_item_category_id") if options.itemPrefix: query.addWhereOp("ci.description", "like", options.itemPrefix + "%%") # Add wildcard to enabe prefix search if options.categoryNames: query.addWhereIn("cic.description", options.categoryNames.split(",")) query.addOrderBy( "cic.description, ci.name, ci.description, ci.clinical_item_id") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect("count(order_med_id_coded) as nOrders") query.addSelect("om.med_route, om.medication_id, om.med_description") query.addFrom("starr_datalake2018.order_med as om") if options.descriptionPrefix: query.addWhereOp("om.med_description", "like", options.descriptionPrefix + "%%") # Add wildcard to enabe prefix search if options.medRoutes: query.addWhereIn("om.med_route", options.medRoutes.split(",")) query.addGroupBy("om.medication_id, om.med_description, om.med_route") query.addOrderBy("nOrders desc, om.med_description") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
def _get_components_in_lab_panel(self): # Initialize DB connection. cursor = self._connection.cursor() # Doing a single query results in a sequential scan through # stride_order_results. To avoid this, break up the query in two. # First, get all the order_proc_ids for proc_code. query = SQLQuery() query.addSelect('order_proc_id') query.addFrom('stride_order_proc') query.addWhereIn('proc_code', [self._lab_panel]) query.addGroupBy('order_proc_id') log.debug('Querying order_proc_ids for %s...' % self._lab_panel) results = DBUtil.execute(query) lab_order_ids = [row[0] for row in results] # Second, get all base_names from those orders. query = SQLQuery() query.addSelect('base_name') query.addFrom('stride_order_results') query.addWhereIn('order_proc_id', lab_order_ids) query.addGroupBy('base_name') log.debug('Querying base_names for order_proc_ids...') results = DBUtil.execute(query) components = [row[0] for row in results] return components
def queryOutpatientIronRx(outputFile, patientById): log.info("Query outpatient Iron prescriptions") # Medication IDs derived by mapping through Iron as an ingredient poIronIngredientMedicationIds = (3065, 3066, 3067, 3071, 3074, 3077, 3986, 7292, 11050, 25006, 26797, 34528, 39676, 78552, 79674, 83568, 84170, 85151, 96118, 112120, 112395, 113213, 126035, 198511, 200455, 201994, 201995, 203679, 207059, 207404, 208037, 208072) # Medication IDs directly from prescriptions, formulations that did not map through RxNorm poIronDirectMedicationIds = (111354, 540526, 205010, 121171, 111320, 82791, 93962, 201795, 206722, 201068, 116045, 208725, 111341, 206637, 112400, 210256, 77529, 20844, 83798, 205523, 112428, 125474, 111343) allEnteralIronMedicationIds = set(poIronIngredientMedicationIds).union( poIronDirectMedicationIds) formatter = TextResultsFormatter(outputFile) colNames = ["pat_id", "ordering_date"] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("stride_order_med") query.addWhereIn("medication_id", allEnteralIronMedicationIds) query.addWhereIn("pat_id", patientById.viewkeys()) query.addOrderBy("pat_id") query.addOrderBy("ordering_date") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def _get_random_patient_list(self): # Initialize DB cursor. cursor = self._connection.cursor() # Get average number of results for this lab test per patient. avg_orders_per_patient = self._get_average_orders_per_patient() log.info('avg_orders_per_patient: %s' % avg_orders_per_patient) # Based on average # of results, figure out how many patients we'd # need to get for a feature matrix of requested size. self._num_patients = int(numpy.max([self._num_requested_episodes / \ avg_orders_per_patient, 1])) # Get numPatientsToQuery random patients who have gotten test. # TODO(sbala): Have option to feed in a seed for the randomness. query = SQLQuery() query.addSelect('pat_id') query.addFrom('stride_order_proc AS sop') query.addWhereIn('proc_code', [self._lab_panel]) query.addOrderBy('RANDOM()') query.setLimit(self._num_patients) log.debug('Querying random patient list...') results = DBUtil.execute(query) # Get patient list. random_patient_list = [ row[0] for row in results ] return random_patient_list
def generatePatientItemsForCompositeId(self, clinicalItemIds, compositeId, conn=None): """Create patient_item records for the composite to match the given clinical item ID patient items. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # Record linking information for componentId in clinicalItemIds: linkModel = RowItemModel() linkModel["clinical_item_id"] = compositeId linkModel["linked_item_id"] = componentId insertQuery = DBUtil.buildInsertQuery("clinical_item_link", linkModel.keys()) insertParams = linkModel.values() DBUtil.execute(insertQuery, insertParams, conn=conn) # Extract back link information, which will also flatten out any potential inherited links linkedItemIdsByBaseId = self.loadLinkedItemIdsByBaseId(conn=conn) linkedItemIds = linkedItemIdsByBaseId[compositeId] # Create patienItem records for the composite clinical item to overlap existing component ones # First query for the existing component records query = SQLQuery() query.addSelect("*") query.addFrom("patient_item") query.addWhereIn("clinical_item_id", linkedItemIds) results = DBUtil.execute(query, includeColumnNames=True, conn=conn) patientItems = modelListFromTable(results) # Patch component records to instead become composite item records then insert back into database progress = ProgressDots(total=len(patientItems)) for patientItem in patientItems: del patientItem["patient_item_id"] patientItem["clinical_item_id"] = compositeId patientItem["analyze_date"] = None insertQuery = DBUtil.buildInsertQuery("patient_item", patientItem.keys()) insertParams = patientItem.values() try: # Optimistic insert of a new unique item DBUtil.execute(insertQuery, insertParams, conn=conn) except conn.IntegrityError, err: # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible log.info(err) progress.Update() # progress.PrintStatus(); finally: if not extConn: conn.close()
def queryPatientClinicalItemData(self, analysisQuery, conn): """Query for all of the order / item data for each patient noted in the analysisQuery and yield them one list of clinicalItemIds at a time. Generated iterator over 2-ples (patientId, clinicalItemIdList) - Patient ID: ID of the patient for which the currently yielded item intended for - Clinical Item ID List: List of all of the clinical items / orders for this patient ordered by item date (currently excluding those that are off the "default_recommend" / on the "default exclusion" list). """ sqlQuery = SQLQuery(); sqlQuery.addSelect("pi.patient_id"); sqlQuery.addSelect("pi.clinical_item_id"); #sqlQuery.addSelect("pi.item_date"); sqlQuery.addFrom("clinical_item_category as cic"); sqlQuery.addFrom("clinical_item as ci"); sqlQuery.addFrom("patient_item as pi"); sqlQuery.addWhere("cic.clinical_item_category_id = ci.clinical_item_category_id"); sqlQuery.addWhere("ci.clinical_item_id = pi.clinical_item_id"); sqlQuery.addWhereIn("pi.patient_id", analysisQuery.patientIds ); sqlQuery.addOrderBy("pi.patient_id"); sqlQuery.addOrderBy("pi.item_date"); # Execute the actual query for patient order / item data cursor = conn.cursor(); cursor.execute( str(sqlQuery), tuple(sqlQuery.params) ); currentPatientId = None; clinicalItemIdList = list(); row = cursor.fetchone(); while row is not None: (patientId, clinicalItemId) = row; if currentPatientId is None: currentPatientId = patientId; if patientId != currentPatientId: # Changed patient, yield the existing data for the previous patient yield (currentPatientId, clinicalItemIdList); # Update our data tracking for the current patient currentPatientId = patientId; clinicalItemIdList = list(); clinicalItemIdList.append(clinicalItemId); row = cursor.fetchone(); # Yield / return the last patient data yield (currentPatientId, clinicalItemIdList); cursor.close();
def clinicalItemSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() #query.openWhereOrClause() for searchField in ("ci.description", ): for searchWord in searchWords: query.addWhereOp( searchField, "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord }) # Prefix search by regular expression #query.closeWhereOrClause() if itemQuery.sourceTables: query.addWhereIn("cic.source_table", itemQuery.sourceTables) if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addWhere( "ci.item_count <> 0" ) # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis) if itemQuery.sortField: query.addOrderBy(itemQuery.sortField) query.addOrderBy("cic.description") query.addOrderBy("ci.name") query.addOrderBy("ci.description") if itemQuery.resultCount is not None: query.limit = itemQuery.resultCount dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def querySourceItems(self, userSIDs, limit=None, offset=None, progress=None, conn=None): """Query the database for list of all AccessLogs and yield the results one at a time. If userSIDs provided, only return items matching those IDs. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table headers = [ "user_id", "user_name", "de_pat_id", "access_datetime", "metric_id", "metric_name", "line_count", "description", "metric_group_num", "metric_group_name" ] query = SQLQuery() for header in headers: query.addSelect(header) query.addFrom(self.sourceTableName) if userSIDs is not None: query.addWhereIn("user_id", userSIDs) query.setLimit(limit) query.setOffset(offset) # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) yield rowModel row = cursor.fetchone() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def loadRespiratoryViralPanelItemIds(extractor): # labCategoryId = 6; labCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Lab'")[0][0]; query = SQLQuery(); query.addSelect("clinical_item_id"); query.addFrom("clinical_item"); query.addWhereEqual("analysis_status", 1); query.addWhereIn("clinical_item_category_id", (labCategoryId,) ); query.addWhere("description ~* '%s'" % 'Respiratory.*Panel' ); respiratoryViralPanelItemIds = set(); for row in DBUtil.execute(query): respiratoryViralPanelItemIds.add(row[0]); return respiratoryViralPanelItemIds;
def _get_average_orders_per_patient(self): # Initialize DB cursor. cursor = self._connection.cursor() # Get average number of results for this lab test per patient. query = SQLQuery() if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE': #TODO: add STRIDE component routine query.addSelect('CAST(pat_id AS BIGINT) AS pat_id') query.addSelect('COUNT(sop.order_proc_id) AS num_orders') query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') query.addWhereIn("proc_code", [self._lab_panel]) components = self._get_components_in_lab_panel() query.addWhereIn("base_name", components) query.addGroupBy('pat_id') elif LocalEnv.DATASET_SOURCE_NAME == 'UMich': query.addSelect('CAST(pat_id AS BIGINT) AS pat_id') query.addSelect('COUNT(order_proc_id) AS num_orders') query.addFrom('labs') query.addWhereIn(self._varTypeInTable, [self._lab_var]) components = self._get_components_in_lab_panel() query.addWhereIn("base_name", components) query.addGroupBy('pat_id') log.debug('Querying median orders per patient...') results = DBUtil.execute(query) order_counts = [row[1] for row in results] if len(order_counts) == 0: error_msg = '0 orders for lab "%s."' % self._lab_var log.critical(error_msg) raise Exception(error_msg) # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception else: return numpy.median(order_counts)
def deactivateAnalysisByCount(self, thresholdInstanceCount, categoryIds=None, conn=None): """Find clinical items to deactivate, based on their instance (patient_item) counts being too low to be interesting. Can restrict to applying to only items under certain categories. Use data/analysis/queryItemCounts.py to help guide selections with queries like: select count(clinical_item_id), sum(item_count) from clinical_item where item_count > %s and clinical_item_category_id in (%s) (and analysis_status = 1)? Seems like good filter, but the process itself will change this count Direct search option as below, but that's usually for pre-processing before activations even start. Former meant to count records that have already gone through analysis. select clinical_item_id, count(distinct patient_id), count(distinct encounter_id), count(patient_item_id) from patient_item group by clinical_item_id """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # Make sure clinical item instance (patient item) counts are up to date self.updateClinicalItemCounts(conn=conn) query = SQLQuery() query.addSelect("clinical_item_id") query.addFrom("clinical_item") if self.maxClinicalItemId is not None: # Restrict to limited / test data query.addWhereOp("clinical_item_id", "<", self.maxClinicalItemId) if categoryIds is not None: query.addWhereIn("clinical_item_category_id", categoryIds) query.addWhereOp("item_count", "<=", thresholdInstanceCount) results = DBUtil.execute(query, conn=conn) clinicalItemIds = set() for row in results: clinicalItemIds.add(row[0]) self.deactivateAnalysis(clinicalItemIds, conn=conn) finally: if not extConn: conn.close()
def queryDemographics(patientDF, baseDate): log.info("Populate demographics background for %d patients" % len(patientDF) ); query = SQLQuery(); query.addSelect("pat_id"); query.addSelect("%d-birth_year as age" % baseDate.year ); query.addSelect("gender"); query.addSelect("primary_race"); query.addFrom("stride_patient"); query.addWhereIn("pat_id", patientDF["patientId"] ); results = DBUtil.execute(query); cols = ["patientId","age","gender","race"]; newDF = pd.DataFrame(results,columns=cols); return patientDF.merge(newDF, how="left");
def loadBloodCultureItemIds(extractor): # microCategoryId = 15; microCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Microbiology'")[0][0]; # Look for diagnostic tests indicating suspected infection / sepsis query = SQLQuery(); query.addSelect("clinical_item_id"); query.addFrom("clinical_item"); query.addWhereEqual("analysis_status", 1); query.addWhereIn("clinical_item_category_id", (microCategoryId,) ); query.addWhere("description ~* '%s'" % 'Blood Culture' ); bloodCultureItemIds = set(); for row in DBUtil.execute(query): bloodCultureItemIds.add(row[0]); return bloodCultureItemIds;
def queryClinicalItems(outputFile, clinicalItemIds, patientById): log.info("Query Clinical Items: %s" % str(clinicalItemIds)) formatter = TextResultsFormatter(outputFile) colNames = ["patient_id", "item_date"] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("patient_item") query.addWhereIn("clinical_item_id", clinicalItemIds) query.addWhereIn("patient_id", patientById.viewkeys()) query.addOrderBy("patient_id") query.addOrderBy("item_date") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def prepareItemAssociations(self, itemIdPairs, linkedItemIdsByBaseId, conn): """Make sure all pair-wise item association records are ready / initialized so that subsequent queries don't have to pause to check for their existence. Should help greatly to reduce number of queries and execution time. """ clinicalItemIdSet = set() #Do the below to convert the list of strings into a list of pairs, which is needed for the rest of this function for index, pair in enumerate(itemIdPairs): itemIdPairs[index] = eval(pair) for (itemId1, itemId2) in itemIdPairs: clinicalItemIdSet.add(itemId1) clinicalItemIdSet.add(itemId2) nItems = len(clinicalItemIdSet) # Now go through all needed item pairs and create default records as needed log.debug("Ensure %d baseline records ready" % (nItems * nItems)) for itemId1 in clinicalItemIdSet: # Query to see which ones already exist in the database # Do this for each source clinical item instead of all combinations to avoid excessive in memory tracking query = SQLQuery() query.addSelect("clinical_item_id") query.addSelect("subsequent_item_id") query.addFrom("clinical_item_association") query.addWhereEqual("clinical_item_id", itemId1) query.addWhereIn("subsequent_item_id", clinicalItemIdSet) associationTable = DBUtil.execute(query, conn=conn) # Keep track in memory temporarily for rapid lookup existingItemIdPairs = set() for row in associationTable: existingItemIdPairs.add(tuple(row)) for itemId2 in clinicalItemIdSet: itemIdPair = (itemId1, itemId2) if itemIdPair not in existingItemIdPairs and self.acceptableClinicalItemIdPair( itemId1, itemId2, linkedItemIdsByBaseId): defaultAssociation = RowItemModel( itemIdPair, ("clinical_item_id", "subsequent_item_id")) try: # Optimistic insert of a new item pair, should be safe since just checked above, but parallel processes may collide DBUtil.insertRow("clinical_item_association", defaultAssociation, conn=conn) except conn.IntegrityError, err: log.warning(err) pass
def tearDown(self): """Restore state from any setUp or test steps""" log.info("Purge test records from the database") query = SQLQuery() query.delete = True query.addFrom("access_log") query.addWhereIn("user_id", self.testUserIDs) DBUtil.execute(query) query = SQLQuery() query.delete = True query.addFrom("user") query.addWhereIn("user_id", self.testUserIDs) DBUtil.execute(query) DBUtil.execute("drop table %s;" % TEST_SOURCE_TABLE) DBTestCase.tearDown(self)
def _getAdmitDxPatientFrequencyRankByYear(self): # Get list of all clinical item IDs matching admit diagnosis. # Get this list in advance to make subsequent query run a bit faster. admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds() # Build query for # of unique patients. # SELECT # ci.name AS icd_code, # ci.description AS admit_dx, # EXTRACT(YEAR FROM pi.item_date) AS admit_year, # COUNT(DISTINCT pi.patient_id) AS num_unique_patients, # FROM # patient_item AS pi # JOIN # clinical_item AS ci # ON # pi.clinical_item_id = ci.clinical_item_id # WHERE # ci.clinical_item_id in (admitDxClinicalItemIds) # GROUP BY # icd_code, # admit_dx, # admit_year # num_unique_patients # ORDER BY # admit_year, # num_unique_patients DESC query = SQLQuery() query.addSelect("ci.name AS icd_code") query.addSelect("ci.description AS admit_dx") query.addSelect("EXTRACT(YEAR FROM pi.item_date) AS admit_year") query.addSelect("COUNT(DISTINCT pi.patient_id) AS num_unique_patients") query.addFrom("patient_item AS pi") query.addJoin("clinical_item AS ci", "pi.clinical_item_id = ci.clinical_item_id") query.addWhereIn("ci.clinical_item_id", admitDxClinicalItemIds) query.addGroupBy("icd_code") query.addGroupBy("admit_dx") query.addGroupBy("admit_year") query.addGroupBy("num_unique_patients") query.addOrderBy("icd_code") query.addOrderBy("admit_year") query.addOrderBy("num_unique_patients DESC") # Execute query. results = DBUtil.execute(query)
def _getPatientsComponentsHistories(self, item_ids): query = SQLQuery() # SELECT query.addSelect('patient_id') query.addSelect('name') query.addSelect('item_date') # FROM query.addFrom('clinical_item as ci') query.addFrom('patient_item as pi') # WHERE query.addWhere('ci.clinical_item_id = pi.clinical_item_id') query.addWhereIn('ci.clinical_item_id', item_ids) query.addOrderBy('patient_id') query.addOrderBy('item_date') # print query # print query.getParams() return customDBUtil.execute(query)
def _getPatientsLabsHistories(self, proc_codes): query = SQLQuery() # SELECT query.addSelect('pat_id') query.addSelect('abnormal_yn') query.addSelect('result_time') query.addSelect('proc_code') # FROM query.addFrom('stride_order_proc') # query.addFrom('patient_item as pi') # WHERE query.addWhereEqual('lab_status', 'Final result') # query.addWhereEqual('proc_code', proc_code) query.addWhereIn('proc_code', proc_codes) query.addOrderBy('proc_code') query.addOrderBy('pat_id') query.addOrderBy('result_time') return customDBUtil.execute(query)
def add_resident_column(columns_order, csv, survey_file): survey_responses = pd.read_csv(survey_file) # retrieve sim_user_ids query = SQLQuery() query.addSelect("sim_user_id") query.addSelect("name") query.addFrom("sim_user") query.addWhereIn("name", survey_responses['Physician User Name']) user_ids = DBUtil.execute(query) survey_responses = pd.merge(survey_responses, pd.DataFrame(user_ids, columns=['sim_user_id', 'name']), left_on='Physician User Name', right_on='name') csv = pd.merge(csv, survey_responses, left_on='user', right_on='sim_user_id') columns_order.insert(1, 'resident') return csv
def _get_random_patient_list(self): #sx: this function is for avoid RANDOM() on the database cursor = self._connection.cursor() # Get average number of results for this lab test per patient. query = SQLQuery() query.addSelect('pat_id') query.addSelect('COUNT(sop.order_proc_id) AS num_orders') query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') ## query.addWhereIn("base_name", [self._component]) query.addGroupBy('pat_id') log.debug('Querying median orders per patient...') results = DBUtil.execute(query) order_counts = [ row[1] for row in results ] if len(results) == 0: error_msg = '0 orders for component "%s."' % self._component #sx log.critical(error_msg) sys.exit('[ERROR] %s' % error_msg) else: avg_orders_per_patient = numpy.median(order_counts) log.info('avg_orders_per_patient: %s' % avg_orders_per_patient) # Based on average # of results, figure out how many patients we'd # need to get for a feature matrix of requested size. self._num_patients = int(numpy.max([self._num_requested_episodes / \ avg_orders_per_patient, 1])) # Some components may have fewer associated patients than the required sample size patient_number_chosen = min([len(results),self._num_patients]) # inds_random_patients = numpy.random.choice(len(results), size=patient_number_chosen, replace=False) # print 'inds_random_patients:', inds_random_patients pat_IDs_random_patients = [] for ind in inds_random_patients: pat_IDs_random_patients.append(results[ind][0]) # print pat_IDs_random_patients return pat_IDs_random_patients
def queryDrugScreens( patientDF, period, locations ): log.info("Populate drug screens by primary locations"); query = SQLQuery(); query.addSelect("pat_id"); query.addSelect("count(distinct order_proc_id)"); query.addFrom("stride_order_proc_drug_screen"); query.addWhere("ordering_mode = 'Outpatient'"); query.addWhereIn("patient_location", locations ); query.addWhereOp("ordering_date",">", period[0]); query.addWhereOp("ordering_date","<", period[-1]); query.addWhereIn("pat_id", patientDF["patientId"] ); query.addGroupBy("pat_id"); results = DBUtil.execute(query); cols = ["patientId","nDrugScreens"]; newDF = pd.DataFrame(results,columns=cols); patientDF = patientDF.merge(newDF, how="left"); patientDF["nDrugScreens"][np.isnan(patientDF["nDrugScreens"])] = 0; # Populate default values if no data patientDF["nDrugScreens"] = patientDF["nDrugScreens"].astype("int"); # Beware of float conversion somewhere return patientDF;
def action_default(self): # Convert query category ID(s) into a list, even of size 1 categoryIds = self.requestData["clinical_item_category_id"].split(",") query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("ci.item_count") query.addFrom("clinical_item as ci") query.addWhere("analysis_status = 1") # Ignore specified items query.addWhereIn("ci.clinical_item_category_id", categoryIds) query.addOrderBy(self.requestData["orderBy"]) resultTable = DBUtil.execute(query, includeColumnNames=True) resultModels = modelListFromTable(resultTable) optionValues = [] optionTexts = [] displayFields = ("name", "description", "item_count") for resultModel in resultModels: optionValues.append(str(resultModel["clinical_item_id"])) orderField = self.requestData["orderBy"].split()[0] orderValue = resultModel[orderField] textValueList = [str(orderValue)] for field in displayFields: if field != orderField: textValueList.append(str(resultModel[field])) textValue = str.join(" - ", textValueList) optionTexts.append(textValue) # Conveniently, Python string representation coincides with JavaScript self.requestData["optionValuesJSON"] = str(optionValues) self.requestData["optionTextsJSON"] = str(optionTexts)
def loadUserInfo(self, userIds=None, conn=None): """Load basic information about the specified users """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("su.sim_user_id") query.addSelect("su.name") query.addFrom("sim_user as su") if userIds is not None: query.addWhereIn("su.sim_user_id", userIds) dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def fetch_components_in_panel(lab_panel): # Doing a single query results in a sequential scan through # stride_order_results. To avoid this, break up the query in two. # First, get all the order_proc_ids for proc_code. query = SQLQuery() query.addSelect('order_proc_id') query.addFrom('stride_order_proc') query.addWhereIn('proc_code', [lab_panel]) query.addGroupBy('order_proc_id') results = DBUtil.execute(query) lab_order_ids = [row[0] for row in results] # Second, get all base_names from those orders. query = SQLQuery() query.addSelect('base_name') query.addFrom('stride_order_results') query.addWhereIn('order_proc_id', lab_order_ids) query.addGroupBy('base_name') results = DBUtil.execute(query) components = [row[0] for row in results] return components
def loadStateInfo(self, stateIds=None, conn=None): """Load basic information about the specified patient states """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ss.sim_state_id") query.addSelect("ss.name") query.addSelect("ss.description") query.addFrom("sim_state as ss") if stateIds is not None: query.addWhereIn("ss.sim_state_id", stateIds) dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()