def loadPatientOrders(self, patientId, currentTime, loadActive=True, conn=None): """Load orders for the given patient that exist by the specified current time point. loadActive - Specify whether to load active vs. inactive/completed orders. Set to None to load both """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("po.sim_patient_order_id") query.addSelect("po.sim_user_id") query.addSelect("po.sim_patient_id") query.addSelect("po.sim_state_id") query.addSelect("po.clinical_item_id") query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") query.addWhereEqual("sim_patient_id", patientId) query.addWhereOp("relative_time_start", "<=", currentTime) if loadActive: # Filter out inactive orders here. query.openWhereOrClause() query.addWhere("relative_time_end is null") query.addWhereOp("relative_time_end", ">", currentTime) query.closeWhereOrClause() #elif loadActive is not None: # Filter out active orders here. # query.addWhereOp("relative_time_end","<=", currentTime) if loadActive: # Organize currently active orders by category query.addOrderBy("cic.description") query.addOrderBy("ci.description") query.addOrderBy("relative_time_start") else: # Otherwise chronologic order query.addOrderBy("relative_time_start") query.addOrderBy("cic.description") query.addOrderBy("ci.description") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def generatePatientItemsForCompositeId(self, clinicalItemIds, compositeId, conn=None): """Create patient_item records for the composite to match the given clinical item ID patient items. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # Record linking information for componentId in clinicalItemIds: linkModel = RowItemModel() linkModel["clinical_item_id"] = compositeId linkModel["linked_item_id"] = componentId insertQuery = DBUtil.buildInsertQuery("clinical_item_link", linkModel.keys()) insertParams = linkModel.values() DBUtil.execute(insertQuery, insertParams, conn=conn) # Extract back link information, which will also flatten out any potential inherited links linkedItemIdsByBaseId = self.loadLinkedItemIdsByBaseId(conn=conn) linkedItemIds = linkedItemIdsByBaseId[compositeId] # Create patienItem records for the composite clinical item to overlap existing component ones # First query for the existing component records query = SQLQuery() query.addSelect("*") query.addFrom("patient_item") query.addWhereIn("clinical_item_id", linkedItemIds) results = DBUtil.execute(query, includeColumnNames=True, conn=conn) patientItems = modelListFromTable(results) # Patch component records to instead become composite item records then insert back into database progress = ProgressDots(total=len(patientItems)) for patientItem in patientItems: del patientItem["patient_item_id"] patientItem["clinical_item_id"] = compositeId patientItem["analyze_date"] = None insertQuery = DBUtil.buildInsertQuery("patient_item", patientItem.keys()) insertParams = patientItem.values() try: # Optimistic insert of a new unique item DBUtil.execute(insertQuery, insertParams, conn=conn) except conn.IntegrityError, err: # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible log.info(err) progress.Update() # progress.PrintStatus(); finally: if not extConn: conn.close()
def test_copyPatientTemplate(self): # Copy a patient template, including deep copy of notes, orders, states, but only up to relative time zero newPatientData = { "name": "Template Copy" } templatePatientId = -1 self.testPatientId = self.manager.copyPatientTemplate( newPatientData, templatePatientId) futureTime = 1000000 # Far future time to test that we still only copied the results up to time zero # Verify basic patient information patientCols = ["name", "age_years", "gender", "sim_state_id"] patientModel = self.manager.loadPatientInfo([self.testPatientId])[0] expectedPatientModel = RowItemModel( ["Template Copy", 60, "Female", -1], patientCols) self.assertEqualDict(expectedPatientModel, patientModel, patientCols) # Verify notes dataCols = ["sim_patient_id", "content"] sampleData = self.manager.loadNotes(self.testPatientId, futureTime) verifyData = \ [ RowItemModel([self.testPatientId,"Initial Note"], dataCols), RowItemModel([self.testPatientId,"Initial Note"], dataCols), # Second copy because another state initiation at time zero and negative onset time ] self.assertEqualDictList(verifyData, sampleData, dataCols) # Verify orders dataCols = [ "sim_user_id", "sim_patient_id", "sim_state_id", "clinical_item_id", "relative_time_start", "relative_time_end" ] sampleData = self.manager.loadPatientOrders(self.testPatientId, futureTime, loadActive=None) verifyData = \ [ RowItemModel([-1,self.testPatientId,-1,-15,0,None], dataCols), ] self.assertEqualDictList(verifyData, sampleData, dataCols) # Verify states dataCols = [ "sim_patient_id", "sim_state_id", "relative_time_start", "relative_time_end" ] query = SQLQuery() for dataCol in dataCols: query.addSelect(dataCol) query.addFrom("sim_patient_state") query.addWhereEqual("sim_patient_id", self.testPatientId) query.addOrderBy("relative_time_start") sampleDataTable = DBUtil.execute(query, includeColumnNames=True) sampleData = modelListFromTable(sampleDataTable) verifyData = \ [ RowItemModel([self.testPatientId,-1,-7200,0], dataCols), RowItemModel([self.testPatientId,-1,0,None], dataCols), ] self.assertEqualDictList(verifyData, sampleData, dataCols)
def updateClinicalItemCounts(self, acceptCache=False, conn=None): """Update the summary item_counts for clinical_items based on clinical_item_association summary counts. If acceptCache is True, then will first check for existence of an entry "clinicalItemCountsUpdated" in the data_cache table. If it exists, assume we have done this update already, and no need to force the calculations again """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: if acceptCache: isCacheUpdated = (self.getCacheData( "clinicalItemCountsUpdated", conn=conn) is not None) if isCacheUpdated: # Item count caches already updated, no need to recalculate them return # First reset all counts to zero query = "update clinical_item set item_count = 0, patient_count = 0, encounter_count = 0 " params = [] if self.maxClinicalItemId is not None: # Restrict to (test) data query += "where clinical_item_id < %s" % DBUtil.SQL_PLACEHOLDER params.append(self.maxClinicalItemId) DBUtil.execute(query, params, conn=conn) sqlQuery = SQLQuery() sqlQuery.addSelect("clinical_item_id") sqlQuery.addSelect("count_0 as item_count") sqlQuery.addSelect("patient_count_0 as patient_count") sqlQuery.addSelect("encounter_count_0 as encounter_count") sqlQuery.addFrom("clinical_item_association as ci") sqlQuery.addWhere("clinical_item_id = subsequent_item_id") # Look along "diagonal" of matrix for primary summary stats if self.maxClinicalItemId is not None: # Restrict to (test) data sqlQuery.addWhereOp("clinical_item_id", "<", self.maxClinicalItemId) resultTable = DBUtil.execute(sqlQuery, includeColumnNames=True, conn=conn) resultModels = modelListFromTable(resultTable) for result in resultModels: DBUtil.updateRow("clinical_item", result, result["clinical_item_id"], conn=conn) # Make a note that this cache data has been updated self.setCacheData("clinicalItemCountsUpdated", "True", conn=conn) finally: if not extConn: conn.close()
def loadPendingResultOrders(self, patientId, relativeTime, conn=None): """Load all patient orders at the given relativeTime that are due to yield results, but have not yet. Include an estimate of time until results available. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect( "distinct po.clinical_item_id" ) # Distinct so don't report multiple times for panel orders query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect( "sorm.turnaround_time" ) # Could have different turnaround times for single order if different sub results. Just report each. query.addSelect( "sorm.turnaround_time - (%d - po.relative_time_start) as time_until_result" % relativeTime) # Calculate time until expect result query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("sim_order_result_map as sorm") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere("po.clinical_item_id = sorm.clinical_item_id") query.addWhereEqual("sim_patient_id", patientId) # Only catch orders up to the given relativeTime and not cancelled query.addWhereOp("relative_time_start", "<=", relativeTime) query.openWhereOrClause() query.addWhere("relative_time_end is null") query.addWhereOp("relative_time_end", ">", relativeTime) query.closeWhereOrClause() # Only PENDING orders, so don't report orders who results should already be available query.addWhereOp("sorm.turnaround_time + po.relative_time_start", ">", relativeTime) query.addOrderBy("time_until_result") query.addOrderBy("relative_time_start") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def clinicalItemSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() #query.openWhereOrClause() for searchField in ("ci.description", ): for searchWord in searchWords: query.addWhereOp( searchField, "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord }) # Prefix search by regular expression #query.closeWhereOrClause() if itemQuery.sourceTables: query.addWhereIn("cic.source_table", itemQuery.sourceTables) if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addWhere( "ci.item_count <> 0" ) # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis) if itemQuery.sortField: query.addOrderBy(itemQuery.sortField) query.addOrderBy("cic.description") query.addOrderBy("ci.name") query.addOrderBy("ci.description") if itemQuery.resultCount is not None: query.limit = itemQuery.resultCount dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def loadNotes(self, patientId, currentTime, conn=None): """Load notes committed up to the given simulation time. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("sn.sim_note_id") query.addSelect("sps.sim_patient_id") # Link query.addSelect("sn.sim_state_id") query.addSelect("sn.note_type_id") query.addSelect("sn.author_type_id") query.addSelect("sn.service_type_id") query.addSelect( "(sps.relative_time_start + sn.relative_state_time) as relative_time" ) query.addSelect("sn.content") query.addFrom("sim_note as sn") query.addFrom("sim_patient_state as sps") query.addWhere("sn.sim_state_id = sps.sim_state_id") query.addWhereEqual("sps.sim_patient_id", patientId) # Only unlock notes once traverse expected time query.addWhereOp( "(sps.relative_time_start + sn.relative_state_time)", "<=", currentTime) query.addOrderBy( "(sps.relative_time_start + sn.relative_state_time)") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def calculateResultStats(self, baseName, conn): """Query the database for lab results by the given baseName to calculate several summary statistics. """ query = SQLQuery(); query.addSelect("count(ord_num_value) as value_count"); query.addSelect("sum(ord_num_value) as value_sum"); query.addSelect("sum(ord_num_value*ord_num_value) as value_sum_squares"); query.addSelect("max(result_flag) as max_result_flag"); query.addSelect("max(result_in_range_yn) as max_result_in_range"); query.addFrom("stride_order_results"); query.addWhereOp("ord_num_value","<>", SENTINEL_RESULT_VALUE ); query.addWhereEqual("base_name", baseName ) dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable); statModel = dataModels[0]; # Assume that exactly 1 row item will exist statModel["base_name"] = baseName; return statModel;
def action_default(self): # Convert query category ID(s) into a list, even of size 1 categoryIds = self.requestData["clinical_item_category_id"].split(",") query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("ci.item_count") query.addFrom("clinical_item as ci") query.addWhere("analysis_status = 1") # Ignore specified items query.addWhereIn("ci.clinical_item_category_id", categoryIds) query.addOrderBy(self.requestData["orderBy"]) resultTable = DBUtil.execute(query, includeColumnNames=True) resultModels = modelListFromTable(resultTable) optionValues = [] optionTexts = [] displayFields = ("name", "description", "item_count") for resultModel in resultModels: optionValues.append(str(resultModel["clinical_item_id"])) orderField = self.requestData["orderBy"].split()[0] orderValue = resultModel[orderField] textValueList = [str(orderValue)] for field in displayFields: if field != orderField: textValueList.append(str(resultModel[field])) textValue = str.join(" - ", textValueList) optionTexts.append(textValue) # Conveniently, Python string representation coincides with JavaScript self.requestData["optionValuesJSON"] = str(optionValues) self.requestData["optionTextsJSON"] = str(optionTexts)
def loadUserInfo(self, userIds=None, conn=None): """Load basic information about the specified users """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("su.sim_user_id") query.addSelect("su.name") query.addFrom("sim_user as su") if userIds is not None: query.addWhereIn("su.sim_user_id", userIds) dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def loadStateInfo(self, stateIds=None, conn=None): """Load basic information about the specified patient states """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ss.sim_state_id") query.addSelect("ss.name") query.addSelect("ss.description") query.addFrom("sim_state as ss") if stateIds is not None: query.addWhereIn("ss.sim_state_id", stateIds) dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def copyPatientTemplate(self, patientData, templatePatientId, conn=None): """Create a new patient record based on the given template patient ID to copy from. Will copy shallow attributes, overridden by any provided in the given patientData, as well as any patient states, notes, or physician orders UP TO (and including) relative time zero, but not subsequent states, notes, or physician orders (the latter is expected to reflect real user interaction records). """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: templatePatientData = DBUtil.loadRecordModelById("sim_patient", templatePatientId, conn=conn) del templatePatientData["sim_patient_id"] # Remove prior ID to allow for new one templatePatientData.update(patientData) # Override with new content (if exists) DBUtil.insertRow("sim_patient", templatePatientData, conn=conn) # Create new patient record patientId = DBUtil.execute(DBUtil.identityQuery("sim_patient"), conn=conn)[0][0] # Copy initial template patient states query = SQLQuery() query.addSelect("*") # Copy all columns query.addFrom("sim_patient_state as sps") query.addWhereEqual("sps.sim_patient_id", templatePatientId) query.addWhereOp("relative_time_start", "<=", 0) query.addOrderBy("relative_time_start") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) nStates = len(dataModels) for i, dataModel in enumerate(dataModels): del dataModel["sim_patient_state_id"] # Discard copied ID to allow new one if i == nStates - 1: del dataModel["relative_time_end"] # Last state. Blank out end time to reflect open ended for simulation dataModel["sim_patient_id"] = patientId DBUtil.insertRow("sim_patient_state", dataModel, conn=conn) # Copy initial template orders query = SQLQuery() query.addSelect("*") query.addFrom("sim_patient_order as spo") query.addWhereEqual("sim_patient_id", templatePatientId) query.addWhereOp("relative_time_start", "<=", 0) query.addOrderBy("relative_time_start") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) for dataModel in dataModels: del dataModel["sim_patient_order_id"] dataModel["sim_patient_id"] = patientId DBUtil.insertRow("sim_patient_order", dataModel, conn=conn) conn.commit() # Transactional commit for multi-step process return patientId finally: if not extConn: conn.close()
def orderSetSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ic.item_collection_id") query.addSelect("ic.external_id") query.addSelect("ic.name as collection_name") query.addSelect("ic.section") query.addSelect("ic.subgroup") query.addSelect("ci.clinical_item_category_id") query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addFrom("item_collection as ic") query.addFrom("item_collection_item as ici") query.addFrom("clinical_item as ci") query.addWhere("ic.item_collection_id = ici.item_collection_id") query.addWhere("ici.clinical_item_id = ci.clinical_item_id") query.addWhereNotEqual("ic.section", AD_HOC_SECTION) if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() for searchWord in searchWords: query.addWhereOp( "ic.name", "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord}) # Prefix search by regular expression if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addOrderBy("lower(ic.name)") query.addOrderBy("ic.external_id") query.addOrderBy("lower(ic.section)") query.addOrderBy("lower(ic.subgroup)") query.addOrderBy("ci.clinical_item_id") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) # Aggregate up into order sets orderSetModel = None for row in dataModels: if orderSetModel is None or row[ "external_id"] != orderSetModel["external_id"]: if orderSetModel is not None: # Prior order set exists, yield/return it before preparing next one yield orderSetModel orderSetModel = \ { "external_id": row["external_id"], "name": row["collection_name"], "itemList": list(), } orderSetModel["itemList"].append(row) yield orderSetModel # Yield the last processed model finally: if not extConn: conn.close()
def loadResults(self, patientId, relativeTime, conn=None): """Load all results active by the given relativeTime. Will look for sim_patient_state times and sim_patient_order for diagnostic orders, to extrapolate all state-specific results for each order, or using default values if no state specific ones available. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # First query for all expected result labels and states, without state-specific values as # may want outer join behvaior against default state values query = SQLQuery() query.addSelect("distinct spo.sim_state_id") query.addSelect("sr.sim_result_id") query.addSelect("sr.name") query.addSelect("sr.description") query.addSelect("sr.priority") query.addSelect("sr.group_string") query.addSelect("spo.relative_time_start") query.addSelect("sorm.turnaround_time") query.addSelect( "(spo.relative_time_start + sorm.turnaround_time) as result_relative_time" ) query.addFrom("sim_patient_order as spo") query.addFrom("sim_order_result_map as sorm") query.addFrom("sim_result as sr") query.addWhere("spo.clinical_item_id = sorm.clinical_item_id") query.addWhere("sorm.sim_result_id = sr.sim_result_id") query.addWhereEqual("spo.sim_patient_id", patientId) # Only unlock results if appropiate prereq orders were placed in the past (and longer than the turnaround time) query.addWhereOp("spo.relative_time_start + sorm.turnaround_time", "<=", relativeTime) query.addOrderBy("result_relative_time") query.addOrderBy("sr.priority") resultTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) resultModels = modelListFromTable(resultTable) # Pass through results to get set of states to search for stateIds = set([DEFAULT_STATE_ID]) # Include default state to fall back on for resultModel in resultModels: stateIds.add(resultModel["sim_state_id"]) # Second query for state-specific values valueQuery = SQLQuery() valueQuery.addSelect("ssr.sim_state_id") valueQuery.addSelect("ssr.sim_result_id") valueQuery.addSelect("ssr.num_value") valueQuery.addSelect("ssr.num_value_noise") valueQuery.addSelect("ssr.text_value") valueQuery.addSelect("ssr.result_flag") valueQuery.addSelect("ssr.clinical_item_id") # Output clinical item if result flag means something valueQuery.addFrom("sim_state_result as ssr") valueQuery.addWhereIn("ssr.sim_state_id", stateIds) valueTable = DBUtil.execute(valueQuery, includeColumnNames=True, conn=conn) valueModels = modelListFromTable(valueTable) # Store in-memory dictionary for rapid cross-referencing "join" to result table valueModelByStateIdByResultId = dict() for valueModel in valueModels: resultId = valueModel["sim_result_id"] stateId = valueModel["sim_state_id"] if resultId not in valueModelByStateIdByResultId: valueModelByStateIdByResultId[resultId] = dict() valueModelByStateIdByResultId[resultId][stateId] = valueModel # Now go back through original results and join up state-specific values, or use default values if needed resultValueModels = list() for resultModel in resultModels: resultId = resultModel["sim_result_id"] stateId = resultModel["sim_state_id"] if resultId in valueModelByStateIdByResultId: valueModelByStateId = valueModelByStateIdByResultId[ resultId] if stateId in valueModelByStateId: # Have a state-specific value, populate that valueModel = valueModelByStateId[stateId] resultModel.update(valueModel) elif DEFAULT_STATE_ID in valueModelByStateId: # No state-specific value, but have a default one to populate instead valueModel = valueModelByStateId[DEFAULT_STATE_ID] resultModel.update(valueModel) resultValueModels.append(resultModel) else: # No result information available, even in default state. Skip these #resultModel["num_value"] = None; #resultModel["num_value_noise"] = None; #resultModel["text_value"] = None; #resultModel["result_flag"] = None; #resultModel["clinical_item_id"] = None; pass return resultValueModels finally: if not extConn: conn.close()
def loadPatientInfo(self, patientIds=None, relativeTime=None, conn=None): """Load basic information about the specified patients. Report patient state at given time, or default to time zero """ if relativeTime is None: relativeTime = 0 # Just look for time zero default then extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("sp.sim_patient_id") query.addSelect("sp.name") query.addSelect("sp.age_years") query.addSelect("sp.gender") query.addSelect("s.sim_state_id") query.addSelect("s.name as state_name") query.addSelect("s.description as state_description") query.addSelect("sps.relative_time_start") query.addSelect("sps.relative_time_end") query.addFrom("sim_patient as sp") query.addFrom("sim_patient_state as sps") query.addFrom("sim_state as s") query.addWhere("sp.sim_patient_id = sps.sim_patient_id") query.addWhere("sps.sim_state_id = s.sim_state_id") if patientIds is not None: query.addWhereIn("sp.sim_patient_id", patientIds) # Look for the state that matches the given relative time offset query.addWhereOp("sps.relative_time_start", "<=", relativeTime) query.openWhereOrClause() query.addWhere("sps.relative_time_end is null") query.addWhereOp("sps.relative_time_end", ">", relativeTime) query.closeWhereOrClause() query.addOrderBy("sp.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) if len(dataModels) > 0: # Secondary query to build lookup table of possible state transition options from patient current states subQuery = SQLQuery() subQuery.addSelect("pre_state_id") subQuery.addSelect("post_state_id") subQuery.addSelect("clinical_item_id") subQuery.addSelect("time_trigger") subQuery.addFrom("sim_state_transition as sst") subQuery.addWhereIn( "pre_state_id", columnFromModelList(dataModels, "sim_state_id")) subResults = DBUtil.execute(subQuery, conn=conn) # For each pre-state, track which clinical items or times trigger which post-states postStateIdByItemIdByPreStateId = dict() postStateIdTimeTriggerByPreStateId = dict() for preStateId, postStateId, itemId, timeTrigger in subResults: if preStateId not in postStateIdByItemIdByPreStateId: postStateIdByItemIdByPreStateId[preStateId] = dict() postStateIdByItemIdByPreStateId[preStateId][ itemId] = postStateId if timeTrigger is not None: postStateIdTimeTriggerByPreStateId[preStateId] = ( postStateId, timeTrigger) # Record in patient result models for retrieval for i, dataModel in enumerate(dataModels): patientId = dataModel["sim_patient_id"] stateId = dataModel["sim_state_id"] dataModel["postStateIdByItemId"] = dict() if stateId in postStateIdByItemIdByPreStateId: dataModel[ "postStateIdByItemId"] = postStateIdByItemIdByPreStateId[ stateId] dataModel["postStateIdTimeTriggerByPreStateId"] = dict() if stateId in postStateIdTimeTriggerByPreStateId: dataModel[ "postStateIdTimeTrigger"] = postStateIdTimeTriggerByPreStateId[ stateId] if dataModel[ "relative_time_end"] is None and "postStateIdTimeTrigger" in dataModel: # Check that we haven't passed (and should thus trigger) a time-based state transition (postStateId, timeTrigger) = dataModel["postStateIdTimeTrigger"] preStateTime = dataModel["relative_time_start"] postStateTriggerTime = (preStateTime + timeTrigger) if postStateTriggerTime <= relativeTime: # Trigger state transition just by time elapsed #print >> sys.stderr, relativeTime, preStateTime, stateId, postStateTriggerTime, postStateId self.recordStateTransition(patientId, stateId, postStateId, postStateTriggerTime, conn=conn) # State change which can yield new triggers, so recursively reload. # Small risk of infinite recusion if timeTriggers are zero. Otherwise, should converge as each recursion will update the preState relativeTimeStart dataModels[i] = self.loadPatientInfo([patientId], relativeTime, conn=conn)[0] return dataModels finally: if not extConn: conn.close()
def grade_cases(self, sim_patient_ids, sim_grader_id, conn=None): """Given the identifiers for a bunch of simulated physician-patient case records, and the identifier for a particular grading key to use, calculate what grade each case would get based on the choices made and return a dictionary of case grades (keyed by the case ID). """ ext_conn = True if conn is None: conn = self.connFactory.connection() ext_conn = False try: # Inner query retrieves physician-patient cases with ranking group_names (to later select first) # per case for specified cases. Each NULL group_name is treated as a separate group by assigning it # sim_patient_order_id. It also omits Default user (sim_user_id = 0) from grading. inner_query = SQLQuery() inner_query.addSelect("score") inner_query.addSelect( "rank() over (" # ranks rows incrementally in the same group " partition by coalesce(group_name, sim_patient_order_id::text), sim_patient_id" " order by sim_patient_order_id" ")") inner_query.addSelect("sim_user_id") inner_query.addSelect("sim_patient_id") inner_query.addSelect("sim_grader_id") inner_query.addFrom("sim_patient_order spo") inner_query.addJoin( "sim_grading_key sgk", "sgk.clinical_item_id = spo.clinical_item_id" " and sgk.sim_state_id = spo.sim_state_id") inner_query.addWhereEqual("sgk.sim_grader_id", sim_grader_id) inner_query.addWhereNotEqual( "spo.sim_user_id", 0) # 0 = ignore 'Default user', sets up initial cases inner_query.addWhereIn("spo.sim_patient_id", sim_patient_ids) inner_query.addOrderBy("relative_time_start") inner_query.addOrderBy("sim_patient_order_id") # Outer query sums the score per patient case and selects most graded physician for the case. # Theoretically, it isn't necessarily the most active physician for the case since his orders # might have been dropped by selecting only the first record within group_name group. query = SQLQuery() query.addSelect("sim_patient_id") query.addSelect("sim_grader_id") query.addSelect("sum(score) as total_score") query.addSelect( "mode() within group (" # mode() selects most frequent value within group " order by sim_user_id" ") as most_graded_user_id") query.addFrom("(" + str(inner_query) + ") as ranked_groups") query.addWhereEqual("ranked_groups.rank", 1) # count only first order in the same group query.addGroupBy("sim_patient_id") query.addGroupBy("sim_grader_id") query_params = inner_query.getParams() + query.getParams() grades_table = DBUtil.execute(query, query_params, includeColumnNames=True, conn=conn) grades_model = modelListFromTable(grades_table) # get most active users for the cases most_active_user_query = SQLQuery() most_active_user_query.addSelect("sim_patient_id") most_active_user_query.addSelect("mode() within group (" " order by sim_user_id" ") as most_active_user_id") most_active_user_query.addFrom("sim_patient_order") most_active_user_query.addWhereNotEqual("sim_user_id", 0) # ignore Default user most_active_user_query.addWhereIn("sim_patient_id", sim_patient_ids) most_active_user_query.addGroupBy("sim_patient_id") most_active_user_query.addOrderBy("sim_patient_id") most_active_user_table = DBUtil.execute(most_active_user_query, includeColumnNames=True, conn=conn) most_active_user_model = modelListFromTable(most_active_user_table) # make a dict by sim_patient_id out of results - will be used for combining most_active_user_dict = { most_active_user["sim_patient_id"]: most_active_user for most_active_user in most_active_user_model } # combine results complete_grades = [ grade.update(most_active_user_dict[grade["sim_patient_id"]]) for grade in grades_model ] return complete_grades finally: if not ext_conn: conn.close()
def test_performance(self): """ Test performance against DataExtractor. """ # Initialize DB cursor. cursor = self.connection.cursor() # Initialize FeatureMatrixFactory. factoryStart = time.time() self.factory = FeatureMatrixFactory() # Build SQL query for list of patient episodes. patientEpisodeQuery = SQLQuery() patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)") patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id") patientEpisodeQuery.addSelect("proc_code") patientEpisodeQuery.addSelect("order_time") patientEpisodeQuery.addSelect( "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results" ) patientEpisodeQuery.addFrom("stride_order_proc AS sop") patientEpisodeQuery.addFrom("stride_order_results AS sor") patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id") patientEpisodeQuery.addWhereIn("proc_code", ["Foo", "Bar", "Baz", "Qux"]) patientEpisodeQuery.addGroupBy( "pat_id, sop.order_proc_id, proc_code, order_time") patientEpisodeQuery.addOrderBy( "pat_id, sop.order_proc_id, proc_code, order_time") cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params) # Set and process patientEpisodeInput. self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time") self.factory.processPatientEpisodeInput() # Look for lab data 90 days before each episode, but never afterself. preTimeDelta = datetime.timedelta(-90) postTimeDelta = datetime.timedelta(0) # Add clinical item features. self.factory.addClinicalItemFeatures(["PerfItem300"]) self.factory.addClinicalItemFeatures(["PerfItem400"]) self.factory.addClinicalItemFeatures(["PerfItem500"]) # Add lab result features. self.factory.addLabResultFeatures(["Foo"], False, preTimeDelta, postTimeDelta) self.factory.addLabResultFeatures(["Bar"], False, preTimeDelta, postTimeDelta) self.factory.addLabResultFeatures(["Baz"], False, preTimeDelta, postTimeDelta) self.factory.addLabResultFeatures(["Qux"], False, preTimeDelta, postTimeDelta) # Add flowsheet features. self.factory.addFlowsheetFeatures(["Perflow"], preTimeDelta, postTimeDelta) # Build matrix. self.factory.buildFeatureMatrix() # Stop timer. factoryStop = time.time() # Initialize DataExtractor. extractorStart = time.time() extractor = DataExtractor() extractor.dataCache = dict() # Initialize output file. outFile = open("extractor.feature_matrix.tab.gz", "w") formatter = TextResultsFormatter(outFile) # Build SQL query for list of patient episodes. patientEpisodeQuery = SQLQuery() patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)") patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id") patientEpisodeQuery.addSelect("proc_code") patientEpisodeQuery.addSelect("order_time") patientEpisodeQuery.addSelect( "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results" ) patientEpisodeQuery.addFrom("stride_order_proc AS sop") patientEpisodeQuery.addFrom("stride_order_results AS sor") patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id") patientEpisodeQuery.addWhereIn("proc_code", ["Foo", "Bar", "Baz", "Qux"]) patientEpisodeQuery.addGroupBy( "pat_id, sop.order_proc_id, proc_code, order_time") patientEpisodeQuery.addOrderBy( "pat_id, sop.order_proc_id, proc_code, order_time") cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params) # Process patient episodes. patientEpisodes = list() row = cursor.fetchone() while row is not None: (pat_id, order_proc_id, proc_code, order_time, normal_results) = row patientEpisode = \ RowItemModel \ ( { "patient_id": pat_id, "order_proc_id": order_proc_id, "proc_code": proc_code, "order_time": order_time, "result_normal_count": normal_results } ) patientEpisodes.append(patientEpisode) row = cursor.fetchone() # Initialize patient data. lastPatientId = None colNames = None patientEpisodeByIndexTime = None # Look for lab data 90 days before each episode, but never afterself. preTimeDelta = datetime.timedelta(-90) postTimeDelta = datetime.timedelta(0) # Populate patient data. tempColNames = \ ["patient_id", "order_proc_id", "proc_code", "order_time", "result_normal_count"] for patientEpisode in patientEpisodes: patientId = patientEpisode["patient_id"] if lastPatientId is not None and lastPatientId != patientId: # New patient ID so start querying for patient specific data and # populating patient episode data. # Clinical Item (PerfItem300) eventTimes = extractor.parseClinicalItemData_singlePatient(\ modelListFromTable(extractor.queryClinicalItemsByName(\ ("PerfItem300",), [patientId]))) tempColNames.extend(\ extractor.addClinicalItemFeatures_singlePatient(\ eventTimes, patientEpisodeByIndexTime, "PerfItem300", \ daysBins=[])) # Clinical Item (PerfItem400) eventTimes = extractor.parseClinicalItemData_singlePatient(\ modelListFromTable(extractor.queryClinicalItemsByName(\ ("PerfItem400",), [patientId]))) tempColNames.extend(\ extractor.addClinicalItemFeatures_singlePatient(\ eventTimes, patientEpisodeByIndexTime, "PerfItem300", \ daysBins=[])) # Clinical Item (PerfItem500) eventTimes = extractor.parseClinicalItemData_singlePatient(\ modelListFromTable(extractor.queryClinicalItemsByName(\ ("PerfItem500",), [patientId]))) tempColNames.extend(\ extractor.addClinicalItemFeatures_singlePatient(\ eventTimes, patientEpisodeByIndexTime, "PerfItem300", \ daysBins=[])) # Lab Result (Foo) labResultTable = extractor.queryLabResults(["Foo"], [patientId]) labsByBaseName = extractor.parseLabResultsData_singlePatient(\ modelListFromTable(labResultTable)) tempColNames.extend(extractor.addLabFeatures_singlePatient(\ patientEpisodeByIndexTime, labsByBaseName, ["Foo"], \ preTimeDelta, postTimeDelta)) # Lab Result (Bar) labResultTable = extractor.queryLabResults(["Bar"], [patientId]) labsByBaseName = extractor.parseLabResultsData_singlePatient(\ modelListFromTable(labResultTable)) tempColNames.extend(extractor.addLabFeatures_singlePatient(\ patientEpisodeByIndexTime, labsByBaseName, ["Bar"], \ preTimeDelta, postTimeDelta)) # Lab Result (Baz) labResultTable = extractor.queryLabResults(["Baz"], [patientId]) labsByBaseName = extractor.parseLabResultsData_singlePatient(\ modelListFromTable(labResultTable)) tempColNames.extend(extractor.addLabFeatures_singlePatient(\ patientEpisodeByIndexTime, labsByBaseName, ["Baz"], \ preTimeDelta, postTimeDelta)) # Lab Result (Qux) labResultTable = extractor.queryLabResults(["Qux"], [patientId]) labsByBaseName = extractor.parseLabResultsData_singlePatient(\ modelListFromTable(labResultTable)) tempColNames.extend(extractor.addLabFeatures_singlePatient(\ patientEpisodeByIndexTime, labsByBaseName, ["Qux"], \ preTimeDelta, postTimeDelta)) # Flowsheet (Perflow) # tempFile = StringIO() # labResultTable = extractor.queryFlowsheet(["Perflow"], [patientId], tempFile) # flowsheetByNameByPatientId = extractor.parseFlowsheetFile(\ # StringIO(tempFile.getvalue())) # tempColNames.extend(extractor.addFlowsheetFeatures_singlePatient(\ # patientEpisodeByIndexTime, flowsheetByNameByPatientId[patientId], \ # ["Perflow"], preTimeDelta, postTimeDelta, tempColNames)) if colNames is None: # First row, print header row colNames = tempColNames formatter.formatTuple(colNames) # Print out patient (episode) data (one row per episode) formatter.formatResultDicts(patientEpisodeByIndexTime.values(), colNames) if lastPatientId is None or lastPatientId != patientId: # Prepare to aggregate patient episode record per patient patientEpisodeByIndexTime = dict() patientEpisodeByIndexTime[ patientEpisode["order_time"]] = patientEpisode lastPatientId = patientId outFile.flush() # Last Iteration patientId = lastPatientId # Clinical Item (PerfItem300) eventTimes = extractor.parseClinicalItemData_singlePatient(\ modelListFromTable(extractor.queryClinicalItemsByName(\ ("PerfItem300",), [patientId]))) tempColNames.extend(\ extractor.addClinicalItemFeatures_singlePatient(\ eventTimes, patientEpisodeByIndexTime, "PerfItem300", \ daysBins=[])) # Clinical Item (PerfItem400) eventTimes = extractor.parseClinicalItemData_singlePatient(\ modelListFromTable(extractor.queryClinicalItemsByName(\ ("PerfItem400",), [patientId]))) tempColNames.extend(\ extractor.addClinicalItemFeatures_singlePatient(\ eventTimes, patientEpisodeByIndexTime, "PerfItem300", \ daysBins=[])) # Clinical Item (PerfItem500) eventTimes = extractor.parseClinicalItemData_singlePatient(\ modelListFromTable(extractor.queryClinicalItemsByName(\ ("PerfItem500",), [patientId]))) tempColNames.extend(\ extractor.addClinicalItemFeatures_singlePatient(\ eventTimes, patientEpisodeByIndexTime, "PerfItem300", \ daysBins=[])) # Lab Result (Foo) labResultTable = extractor.queryLabResults(["Foo"], [patientId]) labsByBaseName = extractor.parseLabResultsData_singlePatient(\ modelListFromTable(labResultTable)) tempColNames.extend(extractor.addLabFeatures_singlePatient(\ patientEpisodeByIndexTime, labsByBaseName, ["Foo"], \ preTimeDelta, postTimeDelta)) # Lab Result (Bar) labResultTable = extractor.queryLabResults(["Bar"], [patientId]) labsByBaseName = extractor.parseLabResultsData_singlePatient(\ modelListFromTable(labResultTable)) tempColNames.extend(extractor.addLabFeatures_singlePatient(\ patientEpisodeByIndexTime, labsByBaseName, ["Bar"], \ preTimeDelta, postTimeDelta)) # Lab Result (Baz) labResultTable = extractor.queryLabResults(["Baz"], [patientId]) labsByBaseName = extractor.parseLabResultsData_singlePatient(\ modelListFromTable(labResultTable)) tempColNames.extend(extractor.addLabFeatures_singlePatient(\ patientEpisodeByIndexTime, labsByBaseName, ["Baz"], \ preTimeDelta, postTimeDelta)) # Lab Result (Qux) labResultTable = extractor.queryLabResults(["Qux"], [patientId]) labsByBaseName = extractor.parseLabResultsData_singlePatient(\ modelListFromTable(labResultTable)) tempColNames.extend(extractor.addLabFeatures_singlePatient(\ patientEpisodeByIndexTime, labsByBaseName, ["Qux"], \ preTimeDelta, postTimeDelta)) formatter.formatResultDicts(patientEpisodeByIndexTime.values(), colNames) # Close file. outFile.close() # Stop timer. extractorStop = time.time() # Compare results. factoryTime = factoryStop - factoryStart extractorTime = extractorStop - extractorStart self.assertTrue(extractorTime > factoryTime) # Clean up feature matrix files. try: os.remove("extractor.feature_matrix.tab.gz") except OSError: pass try: os.remove(self.factory.getMatrixFileName()) except OSError: pass
def analyzePatientItems(self, patientItemData, analysisQuery, recQuery, patientId, recommender, conn): """Given the primary query data and clinical item list for a given test patient, Parse through the item list and run a query to get the top recommended IDs to produce the relevant verify and recommendation item ID sets for comparison """ if "queryItemCountById" not in patientItemData: # Apparently not able to find / extract relevant data, so skip this record return None queryItemCountById = patientItemData["queryItemCountById"] verifyItemCountById = patientItemData["verifyItemCountById"] ## Query for orderset linked items orderSetQuery = \ """ select ic.external_id, pi.clinical_item_id, pi.item_date >= %(p)s as is_verify_item from patient_item as pi, patient_item_collection_link as picl, item_collection_item as ici, item_collection as ic where patient_id = %(p)s and pi.patient_item_id = picl.patient_item_id and picl.item_collection_item_id = ici.item_collection_item_id and ici.item_collection_id = ic.item_collection_id and ic.section <> %(p)s and item_date >= %(p)s and item_date < %(p)s """ % {"p": DBUtil.SQL_PLACEHOLDER} orderSetParams = ( patientItemData["queryEndTime"], patientItemData["patient_id"], AD_HOC_SECTION, patientItemData["baseItemDate"], patientItemData["verifyEndTime"], ) resultTable = DBUtil.execute(orderSetQuery, orderSetParams, includeColumnNames=True, conn=conn) allOrderSetItems = modelListFromTable(resultTable) orderSetItemsByOrderSetId = dict() for orderSetItem in allOrderSetItems: orderSetId = orderSetItem["external_id"] if orderSetId not in orderSetItemsByOrderSetId: orderSetItemsByOrderSetId[orderSetId] = list() orderSetItemsByOrderSetId[orderSetId].append(orderSetItem) keyOrderSetIds = list(orderSetItemsByOrderSetId.keys()) if analysisQuery.numRecsByOrderSet: # Only use the specified key order set for each set of patient data orderSetId = patientItemData["order_set_id"] if orderSetId not in keyOrderSetIds: # No valid order set orders to use in this setting. Skip this case return None keyOrderSetIds = [orderSetId] # Pre-cache order set item data if self.supportRecommender.itemIdsByOrderSetId is None: self.supportRecommender.initItemLookups(analysisQuery.baseRecQuery) # For each order set, count up how many order set linked items used. # Count up how many items used indirectly that would have been within order set. # Organize by whether the item occurred during the "verify" vs. "query" time period usedItemIdsByOrderSetIdByIsVerifyItem = { True: dict(), False: dict() } for keyOrderSetId in keyOrderSetIds: orderSetItems = orderSetItemsByOrderSetId[keyOrderSetId] for orderSetItem in orderSetItems: isVerifyItem = orderSetItem["is_verify_item"] orderSetId = orderSetItem["external_id"] itemId = orderSetItem["clinical_item_id"] usedItemIdsByOrderSetId = usedItemIdsByOrderSetIdByIsVerifyItem[ isVerifyItem] if orderSetId not in usedItemIdsByOrderSetId: usedItemIdsByOrderSetId[orderSetId] = set() usedItemIdsByOrderSetId[orderSetId].add(itemId) # Summarize into total number of (unique) items available from used order sets, and which of those items were actually used from the order set allUsedOrderSetItemIds = set() allUsedOrderSetIds = set() recommendableUsedOrderSetItemIds = set() allAvailableOrderSetItemIds = set() allAvailableVerifyOrderSetItemIds = set() recommendableAvailableOrderSetItemIds = set() for isVerifyItem, usedItemIdsByOrderSetId in usedItemIdsByOrderSetIdByIsVerifyItem.items( ): for orderSetId, usedItemIds in usedItemIdsByOrderSetId.items(): allUsedOrderSetIds.add(orderSetId) if isVerifyItem: allAvailableVerifyOrderSetItemIds.update( self.supportRecommender.itemIdsByOrderSetId[orderSetId] ) for itemId in usedItemIds: if self.supportRecommender.isItemRecommendable( itemId, None, recQuery, self.supportRecommender.categoryIdByItemId): recommendableUsedOrderSetItemIds.add(itemId) allUsedOrderSetItemIds.update(usedItemIds) for itemId in self.supportRecommender.itemIdsByOrderSetId[ orderSetId]: if self.supportRecommender.isItemRecommendable( itemId, None, recQuery, self.supportRecommender.categoryIdByItemId): recommendableAvailableOrderSetItemIds.add(itemId) allAvailableOrderSetItemIds.update( self.supportRecommender.itemIdsByOrderSetId[orderSetId]) # Treat available order set items from the verify time period like "recommended data" recommendedData = list() for itemId in allAvailableVerifyOrderSetItemIds: if not self.supportRecommender.isItemRecommendable( itemId, None, recQuery, self.supportRecommender.categoryIdByItemId): continue # Skip items that do not fit recommendable criteria (i.e., excluded categories) for fair comparison recItemModel = dict(self.supportRecommender.itemsById[itemId]) recItemModel["score"] = recItemModel[recQuery.sortField] recommendedData.append(recItemModel) recommendedData.sort(key=itemgetter(recQuery.sortField), reverse=True) # Distill down to just the set of recommended item IDs recommendedItemIds = set() for i, recommendationModel in enumerate(recommendedData): if analysisQuery.numRecommendations > 0 and i >= analysisQuery.numRecommendations: break recommendedItemIds.add(recommendationModel["clinical_item_id"]) # Summary metrics on how many order items in query and verify periods are recommendable # Outer join query for order set items should work same way, but maybe simpler to follow as second query itemQuery = \ """ select pi.clinical_item_id, pi.item_date >= %(p)s as is_verify_item from patient_item as pi where patient_id = %(p)s and item_date >= %(p)s and item_date < %(p)s """ % {"p": DBUtil.SQL_PLACEHOLDER} itemParams = ( patientItemData["queryEndTime"], patientItemData["patient_id"], patientItemData["baseItemDate"], patientItemData["verifyEndTime"], ) resultTable = DBUtil.execute(itemQuery, itemParams, conn=conn) recommendableQueryItemIds = set() recommendableVerifyItemIds = set() for itemId, isVerifyItem in resultTable: if self.supportRecommender.isItemRecommendable( itemId, None, recQuery, self.supportRecommender.categoryIdByItemId): if not isVerifyItem: recommendableQueryItemIds.add(itemId) else: recommendableVerifyItemIds.add(itemId) # Order Set Usage Summary Data orderSetItemData = \ { "allUsedOrderSetIds": allUsedOrderSetIds, "allUsedOrderSetItemIds": allUsedOrderSetItemIds, "allAvailableOrderSetItemIds": allAvailableOrderSetItemIds, "recommendableUsedOrderSetItemIds": recommendableUsedOrderSetItemIds, "recommendableAvailableOrderSetItemIds": recommendableAvailableOrderSetItemIds, "recommendableQueryItemIds": recommendableQueryItemIds, "recommendableVerifyItemIds": recommendableVerifyItemIds, } return (queryItemCountById, verifyItemCountById, recommendedItemIds, recommendedData, orderSetItemData)
def populateResultFlag(self, resultModel, conn=None): """If order result row model has no pre-specified result_flag, then assign one based on distribution of known results for this item type, or just a default "Result" flag. """ if resultModel[ "result_flag"] is not None: # Only proceed if flag is currently null / blank return elif resultModel[ "result_in_range_yn"] is not None: # Alternative specification of normal / abnormal if resultModel["result_in_range_yn"] == "Y": resultModel["result_flag"] = FLAG_IN_RANGE else: #resultModel["result_in_range_yn"] == "N": resultModel["result_flag"] = FLAG_ABNORMAL return elif resultModel["ord_num_value"] is None or resultModel[ "ord_num_value"] == SENTINEL_RESULT_VALUE: # No specific result flag or (numerical) value provided. Just record that some result was generated at all resultModel["result_flag"] = FLAG_RESULT return elif resultModel['base_name'] is None: # With 2014-2017 data, there are fields with a null base_name. # We can't build summary stats around this case, so just return # FLAG_RESULT. resultModel['result_flag'] = FLAG_RESULT return #else: # General case, no immediately available result flags extConn = conn is not None if not extConn: conn = self.connFactory.connection() if self.resultStatsByBaseName is None: # Ensure result stats cache is preloaded dataTable = DBUtil.execute("select * from order_result_stat", includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) self.resultStatsByBaseName = modelDictFromList( dataModels, "base_name") if resultModel["base_name"] not in self.resultStatsByBaseName: # Result stats not already in cache. Query from DB and store in cache for future use. statModel = self.calculateResultStats(resultModel["base_name"], conn=conn) # Store results back in cache to facilitate future lookups self.resultStatsByBaseName[resultModel["base_name"]] = statModel DBUtil.insertRow("order_result_stat", statModel, conn=conn) statModel = self.resultStatsByBaseName[resultModel["base_name"]] if statModel["max_result_flag"] is not None or statModel[ "max_result_in_range"] is not None: # Alternative result flagging methods exist. We should just use those and treat this as a normal "in range" result resultModel["result_flag"] = FLAG_IN_RANGE else: # No values in the entire database for this item have a result flag. Let's see if we can estimate ranges based on numerical values if statModel[ "value_count"] > 0: # Found some values to calculate summary stats try: mean = statModel["value_sum"] / statModel["value_count"] # Std Dev = sqrt( E[x^2] - E[x]^2 ) var = (statModel["value_sum_squares"] / statModel["value_count"]) - (mean * mean) stdev = math.sqrt(var) zScore = 0 if stdev > 0: zScore = (resultModel["ord_num_value"] - mean) / stdev if zScore < -Z_SCORE_LIMIT: resultModel["result_flag"] = FLAG_LOW elif zScore > Z_SCORE_LIMIT: resultModel["result_flag"] = FLAG_HIGH else: # |zScore| < Z_SCORE_LIMIT resultModel["result_flag"] = FLAG_IN_RANGE except ValueError as exc: # Math error, probably stdev = 0 or variance < 0, just treat as an unspecified result resultModel["result_flag"] = FLAG_RESULT else: # No value distribution, just record as a non-specific result resultModel["result_flag"] = FLAG_RESULT if not extConn: conn.close()