def test_recommender(self): # Run the recommender against the mock test data above and verify expected stats afterwards. query = RecommenderQuery() #query.queryItemIds = set(); #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.sortField = "tf" query.limit = 16 # Go ahead and query for all since short list and can get expected calculation results for all query.maxRecommendedId = 0 # Artificial constraint to focus only on test data log.debug( "Query with no item key input, just return ranks by general likelihood then." ) headers = ["clinical_item_id", "score"] expectedData = \ [ RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), RowItemModel( [-6, 2.0/13], headers ), RowItemModel( [-1, 1.0/13], headers ), RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), RowItemModel( [-14,1.0/13], headers ), RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with key item inputs for which no data exists. Effecitvely ignore it then, so just return ranks by general likelihood." ) query.queryItemIds = set([-100]) expectedData = \ [ RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), RowItemModel( [-6, 2.0/13], headers ), RowItemModel( [-1, 1.0/13], headers ), RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), RowItemModel( [-14,1.0/13], headers ), RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("Query with category filter on recommended results.") query.queryItemIds = set([-100]) query.excludeCategoryIds = set([-1, -4, -5, -6]) expectedData = \ [ #RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), RowItemModel( [-6, 2.0/13], headers ), #RowItemModel( [-1, 1.0/13], headers ), #RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), #RowItemModel( [-14,1.0/13], headers ), #RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with category filter and specific exclusion filter on recommended results." ) query.queryItemIds = set([-100]) query.excludeItemIds = set([-6, -10]) query.excludeCategoryIds = set([-1, -4, -5, -6]) expectedData = \ [ #RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), #RowItemModel( [-6, 2.0/13], headers ), #RowItemModel( [-1, 1.0/13], headers ), #RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), #RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), #RowItemModel( [-14,1.0/13], headers ), #RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query with a couple of input clinical items + one with no association data (should effectively be ignored)." ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() expectedData = \ [ RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ), RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ), RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with category limit") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set([-2, -4, -5, -6]) expectedData = \ [ #RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ), RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ), #RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ), #RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ), #RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ), RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with specific exclusion") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set([-4, -3, -2]) query.excludeCategoryIds = set() expectedData = \ [ RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ), #RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ), RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query, sort by TF*IDF lift.") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() query.sortField = "lift" expectedData = \ [ #RowItemModel( [-5, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ), #RowItemModel( [-2, (13.0/2)*((1.0/6)*(1.0/2)+(1.0/6)*(2.0/2))], headers ), RowItemModel( [-3, (13.0/1)*((1.0/6)*(2.0/2))], headers ), RowItemModel( [-7, (13.0/1)*((1.0/6)*(2.0/2))], headers ), RowItemModel( [-8, (13.0/1)*((1.0/6)*(2.0/2))], headers ), RowItemModel( [-6, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ), RowItemModel( [-14,(13.0/1)*((1.0/4)*(1.0/2))], headers ), RowItemModel( [-15,(13.0/1)*((1.0/4)*(1.0/2))], headers ), RowItemModel( [-1, (13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-10,(13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-11,(13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-12,(13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-13,(13.0/1)*((1.0/6)*(1.0/2))], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query)
def action_default(self): """Look for related orders by association / recommender methods""" # If patient is specified then modify query and exclusion list based on items already ordered for patient recentItemIds = set() if self.requestData["sim_patient_id"]: patientId = int(self.requestData["sim_patient_id"]) simTime = int(self.requestData["sim_time"]) # Track recent item IDs (orders, diagnoses, unlocked results, etc. that related order queries will be based off of) manager = SimManager() recentItemIds = manager.recentItemIds(patientId, simTime) # Recommender Instance to test on self.recommender = ItemAssociationRecommender() self.recommender.dataManager.dataCache = webDataCache # Allow caching of data for rapid successive queries query = RecommenderQuery() if self.requestData["sortField"] == "": self.requestData["sortField"] = "P-YatesChi2-NegLog" # P-Fisher-NegLog should yield better results, but beware, much longer to calculate query.parseParams(self.requestData) if len(query.excludeItemIds) == 0: query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds( ) if len(query.excludeCategoryIds) == 0: query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds( ) #query.fieldList.extend( ["prevalence","PPV","RR"] ); displayFields = list() if self.requestData["displayFields"] != "": displayFields = self.requestData["displayFields"].split(",") # Exclude items already ordered for the patient from any recommended list query.excludeItemIds.update(recentItemIds) if not query.queryItemIds: # If no specific query items specified, then use the recent patient item IDs query.queryItemIds.update(recentItemIds) recommendedData = self.recommender(query) if len(recommendedData) > 0: # Denormalize results with links to clinical item descriptions self.recommender.formatRecommenderResults(recommendedData) # Display fields should append Format suffix to identify which version to display, but use original for header labels (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed ) = self.prepareDisplayHeaders(displayFields) # Format for HTML and add a control field for interaction with the data for dataModel in recommendedData: self.prepareResultRow(dataModel, displayFields) # Try organize by category if self.requestData["groupByCategory"]: recommendedData = self.recommender.organizeByCategory( recommendedData) colNames = ["controls"] # "name" for code. ,"category_description" colNames.extend(displayFieldsFormatSuffixed) colNames.extend(["description"]) lastModel = None htmlLines = list() for dataModel in recommendedData: newCategory = (lastModel is None or lastModel["category_description"] != dataModel["category_description"]) showCategory = (self.requestData["groupByCategory"] and newCategory) # Limit category display if many repeats if showCategory: htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel) htmlLines.append( self.formatRowHTML(dataModel, colNames, showCategory)) lastModel = dataModel self.requestData["dataRows"] = str.join("\n", htmlLines)
def action_default(self): """Look for related orders by association / recommender methods""" self.recommender = ItemAssociationRecommender() # Instance to test on self.recommender.dataManager.dataCache = webDataCache query = RecommenderQuery() if self.requestData["sortField"] == "": self.requestData["sortField"] = "P-YatesChi2-NegLog" # P-Fisher-NegLog should yield better results, but beware, much longer to calculate query.parseParams(self.requestData) if len(query.excludeItemIds) == 0: query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds( ) if len(query.excludeCategoryIds) == 0: query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds( ) #query.fieldList.extend( ["prevalence","PPV","RR"] ); displayFields = list() if self.requestData["displayFields"] != "": displayFields = self.requestData["displayFields"].split(",") recommendedData = self.recommender(query) if len(recommendedData) > 0: # Denormalize results with links to clinical item descriptions self.recommender.formatRecommenderResults(recommendedData) # Display fields should append Format suffix to identify which version to display, but use original for header labels (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed ) = self.prepareDisplayHeaders(displayFields) # Format for HTML and add a control field for interaction with the data for dataModel in recommendedData: self.prepareResultRow(dataModel, displayFields) # Try organize by category if self.requestData["groupByCategory"]: recommendedData = self.recommender.organizeByCategory( recommendedData) colNames = ["controls"] # "name" for code. ,"category_description" colNames.extend(displayFieldsFormatSuffixed) colNames.extend(["description"]) lastModel = None htmlLines = list() for dataModel in recommendedData: newCategory = (lastModel is None or lastModel["category_description"] != dataModel["category_description"]) showCategory = (self.requestData["groupByCategory"] and newCategory) # Limit category display if many repeats if showCategory: htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel) htmlLines.append( self.formatRowHTML(dataModel, colNames, showCategory)) lastModel = dataModel self.requestData["dataRows"] = str.join("\n", htmlLines)
def test_recommender(self): # Run the recommender against the mock test data above and verify expected stats afterwards. query = RecommenderQuery() #query.queryItemIds = set(); #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data log.debug( "Query with no item key input, just return ranks by general likelihood then." ) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-3], headers ), RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with key item inputs for which no data exists. Effecitvely ignore it then, so just return ranks by general likelihood." ) query.queryItemIds = set([-100]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-3], headers ), RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("Query with category filter on recommended results.") query.queryItemIds = set([-100]) query.excludeCategoryIds = set([-1, -4, -5, -6]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with category filter and specific exclusion filter on recommended results." ) query.queryItemIds = set([-100]) query.excludeItemIds = set([-6]) query.excludeCategoryIds = set([-1, -4, -5, -6]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query with a couple of input clinical items + one with no association data (should effectively be ignored)." ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-4], headers ), RowItemModel( [-6], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query but set a limit on time delta worth counting item associations" ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), RowItemModel( [-4], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with category limit") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set([-2, -4, -5, -6]) query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-4], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with specific exclusion") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set([-4, -3, -2]) query.excludeCategoryIds = set() query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query)
# Keep track of each guideline name set itemIdsByAdmitDxId = dict() for admitDxId, sectionName, guidelineName, itemId, itemName, itemDescription, itemCount in resultsTable: if admitDxId not in itemIdsByAdmitDxId: itemIdsByAdmitDxId[admitDxId] = set() itemIdsByAdmitDxId[admitDxId].add(itemId) admitDxIdSectionGuidelineNameTuples.add( (admitDxId, sectionName, guidelineName)) recommender = ItemAssociationRecommender() for admitDxId, itemIds in itemIdsByAdmitDxId.iteritems(): print >> sys.stderr, admitDxId, len(itemIds) recQuery = RecommenderQuery() recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds() recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds( ) recQuery.queryItemIds = [admitDxId] recQuery.timeDeltaMax = timedelta(1) # Within one day recQuery.countPrefix = "patient_" recQuery.limit = TOP_ITEM_COUNT # Top results by P-value recQuery.sortField = "P-YatesChi2-NegLog" results = recommender(recQuery) #recommender.formatRecommenderResults(results); for result in results: itemIds.add(result["clinical_item_id"]) #print >> sys.stderr, result["description"]; print >> sys.stderr, admitDxId, len(itemIds)