def updateClinicalItemCounts(self, acceptCache=False, conn=None): """Update the summary item_counts for clinical_items based on clinical_item_association summary counts. If acceptCache is True, then will first check for existence of an entry "clinicalItemCountsUpdated" in the data_cache table. If it exists, assume we have done this update already, and no need to force the calculations again """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: if acceptCache: isCacheUpdated = (self.getCacheData( "clinicalItemCountsUpdated", conn=conn) is not None) if isCacheUpdated: # Item count caches already updated, no need to recalculate them return # First reset all counts to zero query = "update clinical_item set item_count = 0, patient_count = 0, encounter_count = 0 " params = [] if self.maxClinicalItemId is not None: # Restrict to (test) data query += "where clinical_item_id < %s" % DBUtil.SQL_PLACEHOLDER params.append(self.maxClinicalItemId) DBUtil.execute(query, params, conn=conn) sqlQuery = SQLQuery() sqlQuery.addSelect("clinical_item_id") sqlQuery.addSelect("count_0 as item_count") sqlQuery.addSelect("patient_count_0 as patient_count") sqlQuery.addSelect("encounter_count_0 as encounter_count") sqlQuery.addFrom("clinical_item_association as ci") sqlQuery.addWhere("clinical_item_id = subsequent_item_id") # Look along "diagonal" of matrix for primary summary stats if self.maxClinicalItemId is not None: # Restrict to (test) data sqlQuery.addWhereOp("clinical_item_id", "<", self.maxClinicalItemId) resultTable = DBUtil.execute(sqlQuery, includeColumnNames=True, conn=conn) resultModels = modelListFromTable(resultTable) for result in resultModels: DBUtil.updateRow("clinical_item", result, result["clinical_item_id"], conn=conn) # Make a note that this cache data has been updated self.setCacheData("clinicalItemCountsUpdated", "True", conn=conn) finally: if not extConn: conn.close()
def clinicalItemFromSourceItem(self, sourceItem, category, conn): # Load or produce a clinical_item record model for the given sourceItem clinicalItemKey = (category["clinical_item_category_id"], sourceItem["code"]) if clinicalItemKey not in self.clinicalItemByCategoryIdCode: # Clinical Item does not yet exist in the local cache. Check if in database table (if not, persist a new record) clinicalItem = RowItemModel({ "clinical_item_category_id": category["clinical_item_category_id"], "external_id": sourceItem["medication_id"], "name": sourceItem["code"], "description": sourceItem["med_description"], }) (clinicalItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", clinicalItem, conn=conn) clinicalItem["clinical_item_id"] = clinicalItemId self.clinicalItemByCategoryIdCode[clinicalItemKey] = clinicalItem else: # Clinical Item does exist, but check for redundancies and opportunities to # simplify different descriptions for the same medication priorClinicalItem = self.clinicalItemByCategoryIdCode[ clinicalItemKey] priorDescription = priorClinicalItem["description"] if len(sourceItem["med_description"]) < len( priorDescription) or priorDescription.startswith( TEMPLATE_MEDICATION_PREFIX): # Prior medication recorded description either a generic template, # or a longer version than necessary, that can be replaced with the current one priorClinicalItem["description"] = sourceItem[ "med_description"] DBUtil.updateRow("clinical_item", priorClinicalItem, priorClinicalItem["clinical_item_id"], conn=conn) return self.clinicalItemByCategoryIdCode[clinicalItemKey]
def signOrders(self, userId, patientId, currentTime, orderItemIds, discontinuePatientOrderIds=None, conn=None): """Commit new order item IDs for the given patient and starting now, and discontinue (set end date) for any existing orders specified. Record any patient state transitions the orders would trigger """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # Denormalized recording of current patient state to facilitate easy retrieval linked to orders later patientInfo = self.loadPatientInfo([patientId], currentTime, conn=conn)[0] stateId = patientInfo["sim_state_id"] postStateIdByItemId = patientInfo["postStateIdByItemId"] orderItemIdSet = set(orderItemIds) # Ensure unique and facilitate set operations insertDict = { "sim_user_id": userId, "sim_patient_id": patientId, "sim_state_id": stateId, "relative_time_start": currentTime } for itemId in orderItemIdSet: insertDict["clinical_item_id"] = itemId DBUtil.insertRow("sim_patient_order", insertDict, conn=conn) # See if any of these new orders triggered state transitions triggerItemIds = postStateIdByItemId.viewkeys() & orderItemIdSet while triggerItemIds: # Found a trigger item triggerItemId = None if len( triggerItemIds ) > 1: # Found multiple. Weird. Arbitrarily act on the one that appeared first in the input list for itemId in orderItemIds: if itemId in triggerItemIds: triggerItemId = itemId break else: triggerItemId = triggerItemIds.pop() postStateId = postStateIdByItemId[triggerItemId] # Record the state transition self.recordStateTransition(patientId, stateId, postStateId, currentTime, conn=conn) # Reload patientInfo to reflect new patient state patientInfo = self.loadPatientInfo([patientId], currentTime, conn=conn)[0] stateId = patientInfo["sim_state_id"] postStateIdByItemId = patientInfo["postStateIdByItemId"] orderItemIdSet.discard(triggerItemId) # Don't keep looking for this one, important to avoid infinite loop triggerItemIds = postStateIdByItemId.viewkeys( ) & orderItemIdSet if discontinuePatientOrderIds is not None: updateDict = { "relative_time_end": currentTime } for patientOrderId in discontinuePatientOrderIds: DBUtil.updateRow("sim_patient_order", updateDict, patientOrderId, conn=conn) finally: conn.commit() if not extConn: conn.close()
def updateMetricDescriptionLines(self): for metricId, descriptionLines in self.metricLineDescriptionsById.items( ): description = str.join(' ', descriptionLines) DBUtil.updateRow("metric", {"description": description}, metricId)
def test_analyzePatientItems(self): # Run the association analysis against the mock test data above and verify # expected stats afterwards. associationQuery = \ """ select clinical_item_id, subsequent_item_id, patient_count_0, patient_count_3600, patient_count_86400, patient_count_604800, patient_count_2592000, patient_count_7776000, patient_count_31536000, patient_count_any, patient_time_diff_sum, patient_time_diff_sum_squares from clinical_item_association where clinical_item_id < 0 order by clinical_item_id, subsequent_item_id """ log.debug("Use incremental update, including date filters to start.") analysisOptions = AnalysisOptions() analysisOptions.patientIds = [-22222, -33333] analysisOptions.startDate = datetime(2000, 1, 9) analysisOptions.endDate = datetime(2000, 2, 11) self.analyzer.analyzePatientItems(analysisOptions) expectedAssociationStats = \ [ [-11,-11, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-11, -6, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -6,-11, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -6, -6, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], ] associationStats = DBUtil.execute(associationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) log.debug( "Use incremental update, only doing the update based on a part of the data." ) analysisOptions = AnalysisOptions() analysisOptions.patientIds = [-22222, -33333] self.analyzer.analyzePatientItems(analysisOptions) expectedAssociationStats = \ [ [-11,-11, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [-11, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -6, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7,-11, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439360000.0], [ -7, -7, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7, -6, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439360000.0], [ -6,-11, 1, 1, 1, 2, 2, 2, 2, 2, 172800.0, 29859840000.0], [ -6, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -6, -6, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], ] associationStats = DBUtil.execute(associationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) log.debug( "Expand incremental update, by now including additional clinical items whose analysis status previously excluded them." ) DBUtil.updateRow("clinical_item", {"analysis_status": 1}, -2) analysisOptions.patientIds = [-22222, -33333] self.analyzer.analyzePatientItems(analysisOptions) expectedAssociationStats = \ [ [-11,-11, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [-11, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -6, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-11, -2, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7,-11, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439360000.0], [ -7, -7, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7, -6, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439360000.0], [ -6,-11, 1, 1, 1, 2, 2, 2, 2, 2, 172800.0, 29859840000.0], [ -6, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -6, -6, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [ -2,-11, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -2, -2, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], ] associationStats = DBUtil.execute(associationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) log.debug( "Incremental update that includes a single patient data being split, so have to account for all of those dependencies" ) headers = [ "patient_item_id", "encounter_id", "patient_id", "clinical_item_id", "item_date" ] dataModels = \ [ RowItemModel( [-1111, -334, -33333, -3, datetime(2000, 2,11, 8)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) analysisOptions.patientIds = [-22222, -33333] self.analyzer.analyzePatientItems(analysisOptions) expectedAssociationStats = \ [ [-11,-11, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [-11, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -6, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-11, -3, 0, 0, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0], [-11, -2, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7,-11, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439360000.0], [ -7, -7, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7, -6, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439360000.0], [ -6,-11, 1, 1, 1, 2, 2, 2, 2, 2, 172800.0, 29859840000.0], [ -6, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -6, -6, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [ -6, -3, 0, 0, 0, 1, 1, 1, 1, 1, 201600.0, 40642560000.0], [ -3,-11, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -3, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -3, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -2,-11, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -2, -3, 0, 0, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0], [ -2, -2, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], ] associationStats = DBUtil.execute(associationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) log.debug( "Negative test case, repeating analysis should not change any results" ) analysisOptions.patientIds = [-22222, -33333] self.analyzer.analyzePatientItems(analysisOptions) associationStats = DBUtil.execute(associationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) log.debug("Complete the remaining incremental update") analysisOptions.patientIds = [-11111, -22222, -33333] self.analyzer.analyzePatientItems(analysisOptions) expectedAssociationStats = \ [ # Note that sum-squares in particular gets rounded off due to large values [-12, -12, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-12, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-12, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-12, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -11, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [-11, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -6, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-11, -3, 0, 0, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0], [-11, -2, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-10, -12, 0, 0, 0, 0, 0, 1, 1, 1, 2678400.0, 7173830000000.0], # Longer time diff? [-10, -10, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-10, -8, 0, 0, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0], [-10, -4, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -8, -12, 0, 0, 0, 0, 0, 1, 1, 1, 2671200.0, 7135310000000.0], # Longer diff [ -8, -10, 0, 0, 1, 1, 1, 1, 1, 1, 79200.0, 6272640000.0], [ -8, -8, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -8, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -7, -11, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439000000.0], [ -7, -7, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7, -6, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439000000.0], [ -6, -11, 1, 1, 1, 2, 2, 2, 2, 2, 172800.0, 29859840000.0], [ -6, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -6, -6, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [ -6, -3, 0, 0, 0, 1, 1, 1, 1, 1, 201600.0, 40642600000.0], [ -4, -12, 0, 0, 0, 0, 0, 1, 1, 1, 2678400.0, 7173830000000.0], # ??? [ -4, -10, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -4, -8, 0, 0, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0], [ -4, -4, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -3, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -3, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -3, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -2, -11, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -2, -3, 0, 0, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0], [ -2, -2, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0] ] associationStats = DBUtil.execute(associationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) # Check the association stats for non-unique counts as well (allowing for repeats) nonUniqueAssociationQuery = \ """ select clinical_item_id, subsequent_item_id, count_0, count_3600, count_86400, count_604800, count_2592000, count_7776000, count_31536000, count_126144000, count_any, time_diff_sum, time_diff_sum_squares from clinical_item_association where clinical_item_id < 0 order by clinical_item_id, subsequent_item_id """ expectedAssociationStats = \ [ # Note that sum-squares in particular gets rounded off due to large values [-12, -12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-12, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-12, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-12, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [-11, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-11, -3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0], [-11, -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-10, -12, 0, 0, 0, 0, 1, 2, 2, 2, 2, 5270400.0, 13892300000000.0], [-10, -10, 2, 2, 3, 3, 3, 3, 3, 3, 3, 86400.0, 7464960000.0], [-10, -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0], [-10, -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -8, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2671200.0, 7135310000000.0], [ -8, -10, 0, 0, 1, 1, 1, 1, 1, 1, 1, 79200.0, 6272640000.0], [ -8, -8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -8, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -7, -11, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0], [ -7, -7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7, -6, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0], [ -6, -11, 1, 1, 1, 2, 2, 2, 2, 2, 2, 172800.0, 29859800000.0], [ -6, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -6, -6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [ -6, -3, 0, 0, 0, 1, 1, 1, 1, 1, 1, 201600.0, 40642600000.0], [ -4, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2678400.0, 7173830000000.0], [ -4, -10, 1, 1, 2, 2, 2, 2, 2, 2, 2, 86400.0, 7464960000.0], [ -4, -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0], [ -4, -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -3, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -3, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -2, -11, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -2, -3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0], [ -2, -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], ] associationStats = DBUtil.execute(nonUniqueAssociationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) # Again for patient level counts patientAssociationQuery = \ """ select clinical_item_id, subsequent_item_id, patient_count_0, patient_count_3600, patient_count_86400, patient_count_604800, patient_count_2592000, patient_count_7776000, patient_count_31536000, patient_count_126144000, patient_count_any, patient_time_diff_sum, patient_time_diff_sum_squares from clinical_item_association where clinical_item_id < 0 order by clinical_item_id, subsequent_item_id """ expectedAssociationStats = \ [ # Note that sum-squares in particular gets rounded off due to large values [-12, -12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-12, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-12, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-12, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [-11, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-11, -3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0], [-11, -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-10, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2678400.0, 2678400.0*2678400.0], # Main difference. Duplicates within a single patient, only count once [-10, -10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], # Don't count duplicates [-10, -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0], [-10, -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -8, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2671200.0, 7135310000000.0], [ -8, -10, 0, 0, 1, 1, 1, 1, 1, 1, 1, 79200.0, 6272640000.0], [ -8, -8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -8, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -7, -11, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0], [ -7, -7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7, -6, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0], [ -6, -11, 1, 1, 1, 2, 2, 2, 2, 2, 2, 172800.0, 29859800000.0], [ -6, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -6, -6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [ -6, -3, 0, 0, 0, 1, 1, 1, 1, 1, 1, 201600.0, 40642600000.0], [ -4, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2678400.0, 7173830000000.0], [ -4, -10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], # Don't count repeats [ -4, -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0], [ -4, -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -3, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -3, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -2, -11, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -2, -3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0], [ -2, -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], ] associationStats = DBUtil.execute(patientAssociationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) # Again for encounter level counts encounterAssociationQuery = \ """ select clinical_item_id, subsequent_item_id, encounter_count_0, encounter_count_3600, encounter_count_86400, encounter_count_604800, encounter_count_2592000, encounter_count_7776000, encounter_count_31536000, encounter_count_126144000, encounter_count_any, encounter_time_diff_sum, encounter_time_diff_sum_squares from clinical_item_association where clinical_item_id < 0 order by clinical_item_id, subsequent_item_id """ expectedAssociationStats = \ [ # Note that sum-squares in particular gets rounded off due to large values [-12, -12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-12, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-12, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-12, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [-11, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-11, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], # No longer related in separate encounters [-11, -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [-10, -12, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2592000.0, 2592000.0*2592000.0], # Only count the relation within a common encounter [-10, -10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], # Now count for separate encounters [-10, -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0], [-10, -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -8, -12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], # No longer related in separate encounters [ -8, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], # No longer related in separate encounters [ -8, -8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -8, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -7, -11, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0], [ -7, -7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7, -6, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0], [ -6, -11, 1, 1, 1, 2, 2, 2, 2, 2, 2, 172800.0, 29859800000.0], [ -6, -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -6, -6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0], [ -6, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], # No longer related in separate encounters [ -4, -12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], # No longer related in separate encounters [ -4, -10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -4, -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0], [ -4, -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -3, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -3, -3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -3, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -2, -11, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], [ -2, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], # No longer related in separate encounters [ -2, -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0], ] associationStats = DBUtil.execute(encounterAssociationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3)
def mergeRelated(self, baseClinicalItemId, clinicalItemIds, reassignMergedItems=True, conn=None): """The specified clinical items will be merged / composited into the base clinical item provided. The remaining now redundant items will be deactivated Patient_item instances will be reassigned to the merged clinical_item (while backup links will be saved to backup_link_patient_item), clinical_item_association counts for the redundant items will removed and analyze_dates reset, requiring a re-run of AssociationAnalysis to redo those counts from scratch (but will now count as the merged / composite item rather than separate ones). Could theoretically figure out how to combine the association stats without re-running analysis, but patient_counts are supposed to ignore duplicates, so hard to know how to aggregate stats (not enough info in them to tell if unique cooccurrences?) Examples this could be relevant for: All blood transfusion indexes, G vs J vs Feeding tube equivalent, Ear, Eyes med routes irrelevant which ear/eye. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # Deactivate other items deactivateIds = set(clinicalItemIds) deactivateIds.discard(baseClinicalItemId) self.deactivateAnalysis(deactivateIds, conn=conn) # Build composite item name and description allIds = set(deactivateIds) allIds.add(baseClinicalItemId) query = SQLQuery() query.addSelect("clinical_item_id") query.addSelect("name") query.addSelect("description") query.addFrom("clinical_item") query.addWhereIn("clinical_item_id", allIds) query.addOrderBy("name") # Ensure consistency across multiple runs results = DBUtil.execute(query, conn=conn) nameList = list() descrList = list() # First pass to get Base Item Description for (clinicalItemId, name, description) in results: if clinicalItemId == baseClinicalItemId: if name is None: name = "" if description is None: description = "" nameList.append(name) descrList.append(description) break # Second pass to get the rest for (clinicalItemId, name, description) in results: if clinicalItemId != baseClinicalItemId: if name is None: name = "" if description is None: description = "" nameList.append(name) descrList.append(description) compositeName = str.join("+", nameList) compositeDescription = str.join("+", descrList) DBUtil.updateRow("clinical_item", { "name": compositeName, "description": compositeDescription }, baseClinicalItemId, conn=conn) if reassignMergedItems: # Reassign other items to the base item, but save backup data first query = SQLQuery() query.addSelect("patient_item_id") query.addSelect("clinical_item_id") query.addFrom("patient_item") query.addWhereIn("clinical_item_id", deactivateIds) results = DBUtil.execute(query, conn=conn) insertQuery = DBUtil.buildInsertQuery( "backup_link_patient_item", ["patient_item_id", "clinical_item_id"]) for (patientItemId, clinicalItemId) in results: insertParams = (patientItemId, clinicalItemId) try: # Optimistic insert of a new unique item DBUtil.execute(insertQuery, insertParams, conn=conn) except conn.IntegrityError, err: # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible log.info(err) pass # Now to actual reassignment of patient items to the unifying base clinical item placeholders = generatePlaceholders(len(deactivateIds)) query = "update patient_item set clinical_item_id = %s where clinical_item_id in (%s)" % ( DBUtil.SQL_PLACEHOLDER, placeholders) params = [baseClinicalItemId] params.extend(deactivateIds) DBUtil.execute(query, params, conn=conn) finally: if not extConn: conn.close()
def normalizeMedIngredients(self, rxcuiDataByMedId, rowModel, convOptions, conn=None): """Given a rowModel of medication data, normalize it further. Specifically, look for common active ingredients to simplify the data. If the medication is actually a compound of multiple active ingredients, then break out into active ingredients. If normalizeMixtures set, then will yield out multiple items to reflect each active ingredient. If normalizeMixtures not set, will yield a single item with name being a composite of the active ingredients. """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); medId = rowModel["medication_id"] if medId not in rxcuiDataByMedId: # No mapping entry found, just use the available generic medication data then rowModel["code"] = GENERIC_CODE_TEMPLATE % rowModel["medication_id"]; yield rowModel; else: # Mapping entry found, yield a normalized model for each active ingredient found # (will usually be a 1-to-1 relation, but sometimes multiple ingredientTheraClassByRxcui = rxcuiDataByMedId[medId]; if len(ingredientTheraClassByRxcui) <= 1 or convOptions.normalizeMixtures: # Single ingredient or want component active ingredients to each have one record for (rxcui, (ingredient, theraClass)) in ingredientTheraClassByRxcui.iteritems(): # ~250/15000 RxCUI's don't have a defined active ingredient. if ingredient is None: continue normalizedModel = RowItemModel(rowModel); normalizedModel["medication_id"] = rxcui; normalizedModel["code"] = RXCUI_CODE_TEMPLATE % rxcui; normalizedModel["description"] = ingredient.title(); yield normalizedModel; elif convOptions.maxMixtureCount is not None and len(ingredientTheraClassByRxcui) > convOptions.maxMixtureCount: # Plan to denormalize, but excessively large mixture. Forget it. rowModel["code"] = GENERIC_CODE_TEMPLATE % rowModel["medication_id"]; yield rowModel; else: # Mixture of multiple ingredients and want to keep denormalized # Extract out the active ingredient names to make a composite based only on that unique combination ingredientRxcuiList = [ (ingredient, rxcui) for (rxcui, (ingredient, theraClass)) in ingredientTheraClassByRxcui.iteritems()]; ingredientRxcuiList.sort(); # Ensure consistent order rxcuiStrList = list(); ingredientList = list(); for (ingredient, rxcui) in ingredientRxcuiList: # ~250/15000 RxCUI's don't have a defined active ingredient. if ingredient is None: continue rxcuiStrList.append(str(rxcui)); ingredientList.append(ingredient.title()); rxcuiComposite = str.join(",", rxcuiStrList ); ingredientComposite = str.join("-",ingredientList ); #rowModel["medication_id"] = hash(rxcuiComposite); # No, just stick to existing medication ID rowModel["code"] = GENERIC_CODE_TEMPLATE % medId; rowModel["description"] = ingredientComposite; yield rowModel; # Do some extra work here to see if we can figure out therapeutic / pharaceutical class labels based on available data if rowModel["thera_class"] is not None: theraClassNeedsPopulation = False; for (rxcui, (ingredient, theraClass)) in ingredientTheraClassByRxcui.iteritems(): if theraClass is None: # Don't have a previously populated class labels for this medication ID, but just found it with this data. Populate then. theraClass = rowModel["thera_class"]; ingredientTheraClassByRxcui[rxcui] = (ingredient, theraClass); theraClassNeedsPopulation = True; if theraClassNeedsPopulation: rowDict = {"thera_class": rowModel["thera_class"], "pharm_class": rowModel["pharm_class"], "pharm_subclass": rowModel["pharm_subclass"],} DBUtil.updateRow("stride_mapped_meds", rowDict, medId, idCol="medication_id", conn=conn); if not extConn: conn.close();