def test_tripleSequence_virtualItem(self): # Test outcome assessment when the target is a virtual item based on the presence of a triple (instead of double) sequence of items # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery() analysisQuery.patientIds = set([-22222]) analysisQuery.baseCategoryId = -7 analysisQuery.queryTimeSpan = timedelta(0, 86400) analysisQuery.sequenceItemIdsByVirtualItemId[-16] = (-15, -14) #analysisQuery.recommender = BaselineFrequencyRecommender(); analysisQuery.recommender = ItemAssociationRecommender() analysisQuery.baseRecQuery = RecommenderQuery() analysisQuery.baseRecQuery.targetItemIds = set([-16]) analysisQuery.baseRecQuery.maxRecommendedId = 0 # Restrict to test data # Initial run without time limits on outcome measure colNames = ["patient_id", "outcome.-16", "score.-16"] expectedResults = [RowItemModel([-22222, +1, 0.14286], colNames)] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o", "-16=-15:-14", "-m", "0", "-R", "ItemAssociationRecommender", '0,-22222', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames)
def __init__(self): BaseDynamicData.__init__(self) self.requestData["queryItemIds"] = "" self.requestData["targetItemIds"] = "" self.requestData["excludeItemIds"] = "" self.requestData["excludeCategoryIds"] = "" self.requestData["timeDeltaMax"] = "" self.requestData["sortField"] = "PPV" self.requestData["sortReverse"] = "True" self.requestData["resultCount"] = "10" self.requestData["invertQuery"] = "" self.requestData["showCounts"] = "" self.requestData["countPrefix"] = "" self.requestData["aggregationMethod"] = "weighted" self.requestData["fieldHeaders"] = "" self.requestData["dataRows"] = "" self.addHandler("resultCount", ItemRecommendationTable.action_default.__name__) self.recommender = ItemAssociationRecommender() # Instance to test on self.recommender.dataManager.dataCache = webDataCache
def test_recommenderAnalysis(self): # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery() analysisQuery.patientIds = set([-11111]) analysisQuery.recommender = BaselineFrequencyRecommender() #analysisQuery.recommender = ItemAssociationRecommender(); analysisQuery.baseRecQuery = RecommenderQuery() analysisQuery.baseRecQuery.maxRecommendedId = 0 # Restrict to test data # Don't use items whose default is to be excluded from recommendations #recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(conn=conn); #recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds(conn=conn); #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) ); # Time delta to use for queries, otherwise just default to all times colNames = [ "patient_id", "clinical_item_id", "iItem", "iRecItem", "recRank", "recScore" ] # Start with default recommender expectedResults = \ [ (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT), #0.170), Don't care about specific scores, as long as ranks are correct (-11111,-10, 1, 1, 4, SENTINEL_ANY_FLOAT), #0.032), (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT), #0.025), (-11111,-12, 4, 3, 2, SENTINEL_ANY_FLOAT), #0.053), ] analysisResults = self.analyzer(analysisQuery) self.assertEqualTable(expectedResults, analysisResults, 3) # Now try targeted recommender analysisQuery.recommender = ItemAssociationRecommender() expectedResults = \ [ (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT), #0.167), (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT), #0.304), (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT), #0.190), (-11111,-12, 4, 3, 1, SENTINEL_ANY_FLOAT), #0.444), ] analysisResults = self.analyzer(analysisQuery) self.assertEqualTable(expectedResults, analysisResults, 3) # Repeat, but put a limit on maximum number of query items and recommendations we want analyzed analysisQuery.queryItemMax = 2 expectedResults = \ [ (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT), #0.167), (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT), #0.304), ] analysisResults = self.analyzer(analysisQuery) self.assertEqualTable(expectedResults, analysisResults, 3)
print("Creating clinical_item_id to description map") id2description = {} clinical_items = open('/Users/jwang/Desktop/Results/clinical_items.csv', "rU") clinical_items.readline() for line in clinical_items: line = line.strip().split(",") clinical_item_id = line[0] description = " ".join(line[1:]) id2description[clinical_item_id] = description # Reopen diagnoses, from the top of the file diagnoses = open('/Users/jwang/Desktop/Results/diagnoses_to_test.csv', "rU") diagnoses.readline() baseQueryStr = "&targetItemIds=&excludeItemIds=71052,71046,71054,71083,71045,71047&excludeCategoryIds=1,58,4,2,160,161,59,13,159,163,23,62,18,11,46,2&timeDeltaMax=86400&sortField=P-YatesChi2-NegLog&sortReverse=True&filterField1=prevalence<:&filterField2=PPV<:&filterField3=RR<:&filterField4=sensitivity<:&filterField5=P-YatesChi2<:&resultCount=4000&invertQuery=false&showCounts=true&countPrefix=patient_&aggregationMethod=weighted&cacheTime=0" recommender = ItemAssociationRecommender() diagnosis_count = 0 for line in diagnoses: line = line.strip().split(",") clinical_item_id = line[0] description = " ".join(line[1:]) queryStr = "queryItemIds=" + str(clinical_item_id) + baseQueryStr print('Finding Top Associations for "{0}"'.format(description)) # Build RecommenderQuery query = RecommenderQuery() paramDict = dict(urlparse.parse_qsl(queryStr, True)) query.parseParams(paramDict) # Call ItemRecommender
def action_default(self): """Look for related orders by association / recommender methods""" # If patient is specified then modify query and exclusion list based on items already ordered for patient recentItemIds = set() if self.requestData["sim_patient_id"]: patientId = int(self.requestData["sim_patient_id"]) simTime = int(self.requestData["sim_time"]) # Track recent item IDs (orders, diagnoses, unlocked results, etc. that related order queries will be based off of) manager = SimManager() recentItemIds = manager.recentItemIds(patientId, simTime) # Recommender Instance to test on self.recommender = ItemAssociationRecommender() self.recommender.dataManager.dataCache = webDataCache # Allow caching of data for rapid successive queries query = RecommenderQuery() if self.requestData["sortField"] == "": self.requestData["sortField"] = "P-YatesChi2-NegLog" # P-Fisher-NegLog should yield better results, but beware, much longer to calculate query.parseParams(self.requestData) if len(query.excludeItemIds) == 0: query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds( ) if len(query.excludeCategoryIds) == 0: query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds( ) #query.fieldList.extend( ["prevalence","PPV","RR"] ); displayFields = list() if self.requestData["displayFields"] != "": displayFields = self.requestData["displayFields"].split(",") # Exclude items already ordered for the patient from any recommended list query.excludeItemIds.update(recentItemIds) if not query.queryItemIds: # If no specific query items specified, then use the recent patient item IDs query.queryItemIds.update(recentItemIds) recommendedData = self.recommender(query) if len(recommendedData) > 0: # Denormalize results with links to clinical item descriptions self.recommender.formatRecommenderResults(recommendedData) # Display fields should append Format suffix to identify which version to display, but use original for header labels (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed ) = self.prepareDisplayHeaders(displayFields) # Format for HTML and add a control field for interaction with the data for dataModel in recommendedData: self.prepareResultRow(dataModel, displayFields) # Try organize by category if self.requestData["groupByCategory"]: recommendedData = self.recommender.organizeByCategory( recommendedData) colNames = ["controls"] # "name" for code. ,"category_description" colNames.extend(displayFieldsFormatSuffixed) colNames.extend(["description"]) lastModel = None htmlLines = list() for dataModel in recommendedData: newCategory = (lastModel is None or lastModel["category_description"] != dataModel["category_description"]) showCategory = (self.requestData["groupByCategory"] and newCategory) # Limit category display if many repeats if showCategory: htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel) htmlLines.append( self.formatRowHTML(dataModel, colNames, showCategory)) lastModel = dataModel self.requestData["dataRows"] = str.join("\n", htmlLines)
def test_recommenderAnalysis(self): # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery(); analysisQuery.patientIds = set([-11111]); analysisQuery.numQueryItems = 1; analysisQuery.numVerifyItems = 3; analysisQuery.numRecommendations = 4; analysisQuery.recommender = BaselineFrequencyRecommender(); #analysisQuery.recommender = ItemAssociationRecommender(); analysisQuery.baseRecQuery = RecommenderQuery(); analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data # Don't use items whose default is to be excluded from recommendations analysisQuery.baseRecQuery.excludeCategoryIds = analysisQuery.recommender.defaultExcludedClinicalItemCategoryIds(); analysisQuery.baseRecQuery.excludeItemIds = analysisQuery.recommender.defaultExcludedClinicalItemIds(); #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) ); # Time delta to use for queries, otherwise just default to all times colNames = ["patient_id", "TP", "FN", "FP", "recall", "precision", "F1-score", "weightRecall","weightPrecision", "normalRecall","normalPrecision", "ROC-AUC"]; # Start with default recommender expectedResults = [ RowItemModel([-11111, 1,2,3, 0.333, 0.25, 0.286, 0.208, 0.254, 0.333/1.0, 0.25/0.75, 0.524], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line interface sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","BaselineFrequencyRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","BaselineFrequencyRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Now try targeted recommender analysisQuery.recommender = ItemAssociationRecommender(); expectedResults = [ RowItemModel([-11111, 1,2,3, 0.333, 0.25, 0.286, 0.347, 0.293, 0.333, 0.25/0.75, 0.6666], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Now try multiple query items targeted recommender analysisQuery.numQueryItems = 2; expectedResults = [ RowItemModel([-11111, 1, 2, 3, 0.333, 0.25, 0.286, 0.254, 0.194, 0.333, 0.25/0.75, 0.4167], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","2","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","2","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # More query items with aggregation options analysisQuery.numQueryItems = 3; expectedResults = [ RowItemModel([-11111, 1, 1, 3, 0.5, 0.25, 0.333, 0.517, 0.194, 0.5, 0.25/0.5, 0.4166], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Value filters analysisQuery.baseRecQuery.sortField= "freqRatio"; analysisQuery.baseRecQuery.fieldFilters["freqRatio>"] = 70; expectedResults = [ RowItemModel([-11111, 2, 0, 2, 1.0, 0.5, 0.6666, 1.0, 0.446, 1.0, 0.5/0.5, 0.375], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); del analysisQuery.baseRecQuery.fieldFilters["freqRatio>"]; # Undo to not affect subsequent queries # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-f","freqRatio>:70.0","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-s","freqRatio","-f","freqRatio>:70.0",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Unweighted aggregation analysisQuery.baseRecQuery.weightingMethod = "unweighted"; expectedResults = [ RowItemModel([-11111, 1, 1, 3, 0.5, 0.25, 0.3333, 0.517, 0.194, 0.5, 0.25/0.5, 0.25], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Run by equivalent query time span selection rather than explicit counts colNames = ["patient_id", "baseItemId", "TP", "FN", "FP", "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"]; expectedResults = [ RowItemModel([-11111, -4, 1, 1, 3, 0.5, 0.25, 0.333, 0.517, 0.194, 0.4167], colNames ) ]; analysisQuery.baseRecQuery.sortField= "conditionalFreq"; analysisQuery.numQueryItems = None; analysisQuery.numVerifyItems = None; analysisQuery.baseCategoryId = -1; analysisQuery.queryTimeSpan = timedelta(0,3*60*60); analysisQuery.verifyTimeSpan = timedelta(50,0); analysisQuery.numRecommendations = 4; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-c","-1","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-c","-1","-Q","5400","-V","4320000",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Run by query time span by identifying base clinical item, rather than a general category analysisQuery.numQueryItems = None; analysisQuery.numVerifyItems = None; analysisQuery.baseCategoryId = None; # Clear prior setting analysisQuery.baseItemId = -4; analysisQuery.queryTimeSpan = timedelta(0,3*60*60); analysisQuery.verifyTimeSpan = timedelta(50,0); analysisQuery.numRecommendations = 4; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-b","-4","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-b","-4","-Q","5400","-V","4320000",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Basic then Filter test data date range colNames = ["patient_id", "TP", "FN", "FP", "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"]; expectedResults = [ RowItemModel([-11111, 1, 1, 3, 0.5, 0.25, 0.33333, 0.4375, 0.29319, 0.66667], colNames ) ]; analysisQuery = AnalysisQuery(); analysisQuery.patientIds = set([-11111]); analysisQuery.numQueryItems = 1; analysisQuery.numVerifyItems = 2; analysisQuery.numRecommendations = 4; analysisQuery.recommender = ItemAssociationRecommender(); analysisQuery.baseRecQuery = RecommenderQuery(); analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","1","-v","2",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Date Filters colNames = ["patient_id", "TP", "FN", "FP", "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"]; expectedResults = [ RowItemModel([-11111, 0, 1, 2, 0.0, 0.0, 0.0, 0.0, 0.0, None], colNames ) ]; analysisQuery = AnalysisQuery(); analysisQuery.patientIds = set([-11111]); analysisQuery.numQueryItems = 1; analysisQuery.numVerifyItems = 2; analysisQuery.numRecommendations = 4; analysisQuery.recommender = ItemAssociationRecommender(); analysisQuery.baseRecQuery = RecommenderQuery(); analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data analysisQuery.startDate = datetime(2000,1,1,1); analysisQuery.endDate = datetime(2000,1,10); analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-S","2000-01-01 01:00:00","-E","2000-01-10","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","1","-v","2","-S","2000-01-01 01:00:00","-E","2000-01-10",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);
def test_recommenderAnalysis(self): # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery() analysisQuery.patientIds = set([-11111]) analysisQuery.baseCategoryId = -7 analysisQuery.queryTimeSpan = timedelta(0, 86400) #analysisQuery.recommender = BaselineFrequencyRecommender(); analysisQuery.recommender = ItemAssociationRecommender() analysisQuery.baseRecQuery = RecommenderQuery() analysisQuery.baseRecQuery.targetItemIds = set([-33, -32, -31, -30]) analysisQuery.baseRecQuery.maxRecommendedId = 0 # Restrict to test data # Initial run without time limits on outcome measure colNames = [ "patient_id", "outcome.-33", "score.-33", "outcome.-32", "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30" ] expectedResults = [ RowItemModel([-11111, +0, 0.222, +2, 0.611, +1, 0.222, +1, 0.222], colNames) ] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o", "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender", '0,-11111', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Redo through prepared file intermediary sys.stdout = StringIO() argv = [ "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400", "-o", "-33,-32,-31,-30", '0,-11111', "-" ] self.preparer.main(argv) preparedDataFile = StringIO(sys.stdout.getvalue()) sys.stdin = preparedDataFile # Read prepared data file from redirected stdin sys.stdout = StringIO() argv = [ "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R", "ItemAssociationRecommender", '-', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Now try with time limitation on outcome measure analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 604800) # 1 week colNames = [ "patient_id", "outcome.-33", "score.-33", "outcome.-32", "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30" ] expectedResults = [ RowItemModel([-11111, +0, 0.222, +2, 0.611, +0, 0.222, +1, 0.222], colNames) ] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t", "604800", "-o", "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender", '0,-11111', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Redo through prepared file intermediary sys.stdout = StringIO() argv = [ "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400", "-t", "604800", "-o", "-33,-32,-31,-30", '0,-11111', "-" ] self.preparer.main(argv) preparedDataFile = StringIO(sys.stdout.getvalue()) sys.stdin = preparedDataFile # Read prepared data file from redirected stdin sys.stdout = StringIO() argv = [ "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R", "ItemAssociationRecommender", "-t", "604800", '-', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Again, but with much stricter time limit (negative test case) analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 172800) # 2 day colNames = [ "patient_id", "outcome.-33", "score.-33", "outcome.-32", "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30" ] expectedResults = [ RowItemModel([-11111, 0, 0.0109, 2, 0.0600, 0, 0.0109, 0, 0.0109], colNames) ] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t", "172800", "-o", "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender", '0,-11111', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Redo through prepared file intermediary sys.stdout = StringIO() argv = [ "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400", "-t", "172800", "-o", "-33,-32,-31,-30", '0,-11111', "-" ] self.preparer.main(argv) preparedDataFile = StringIO(sys.stdout.getvalue()) sys.stdin = preparedDataFile # Read prepared data file from redirected stdin sys.stdout = StringIO() argv = [ "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R", "ItemAssociationRecommender", "-t", "172800", '-', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames)
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() log.info("Populate the database with test data") self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = ["clinical_item_id", "clinical_item_category_id", "name"] dataModels = \ [ RowItemModel( [-1, -1, "CBC"], headers ), RowItemModel( [-2, -1, "BMP"], headers ), RowItemModel( [-3, -1, "Hepatic Panel"], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ), RowItemModel( [-5, -2, "CXR"], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ), RowItemModel( [-8, -2, "CT PE Thorax"], headers ), RowItemModel( [-9, -3, "Acetaminophen"], headers ), RowItemModel( [-10, -3, "Carvedilol"], headers ), RowItemModel( [-11, -3, "Enoxaparin"], headers ), RowItemModel( [-12, -3, "Warfarin"], headers ), RowItemModel( [-13, -3, "Ceftriaxone"], headers ), RowItemModel( [-14, -4, "Foley Catheter"], headers ), RowItemModel( [-15, -4, "Strict I&O"], headers ), RowItemModel( [-16, -4, "Fall Precautions"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-4, -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-10, -22222, -7, datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-12, -22222, -6, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-14, -33333, -6, datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-15, -33333, -2, datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = \ [ "clinical_item_id","subsequent_item_id", "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any", "time_diff_sum", "time_diff_sum_squares", ] dataModels = \ [ RowItemModel( [ -1, -1, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -2, -2, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -3, -3, 95, 95, 97, 97, 97, 0.0, 0.0], headers ), RowItemModel( [ -4, -4, 40, 40, 40, 40, 40, 0.0, 0.0], headers ), RowItemModel( [ -5, -5, 40, 40, 50, 50, 50, 0.0, 0.0], headers ), RowItemModel( [ -6, -6, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -2, -3, 0, 0, 0, 0, 0, 0.0, 0.0], headers ), # Zero count associations, probably shouldn't even be here. If so, ignore them anyway RowItemModel( [ -2, -4, 0, 2, 3, 3, 3, 200.0, 50000.0], headers ), RowItemModel( [ -2, -6, 2, 2, 5, 5, 5, 300.0, 11990.0], headers ), RowItemModel( [ -3, -1, 20, 23, 23, 23, 23, 400.0, 344990.0], headers ), RowItemModel( [ -4, -5, 3, 3, 13, 43, 43, 340.0, 343110.0], headers ), RowItemModel( [ -4, -6, 23, 33, 33, 33, 63, 420.0, 245220.0], headers ), RowItemModel( [ -4, -7, 23, 33, 33, 33, 63, 40.0, 5420.0], headers ), RowItemModel( [ -5, -4, 0, 0, 20, 20, 20, 540.0, 54250.0], headers ), RowItemModel( [ -6, -2, 7, 7, 7, 7, 7, 1.0, 1.0], headers ), RowItemModel( [ -6, -4, 20, 20, 20, 20, 20, 1.0, 1.0], headers ), ] for dataModel in dataModels: # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval) for header in headers: if header.startswith("patient_count_"): timeStr = header[len("patient_count_"):] dataModel["count_%s" % timeStr] = dataModel[header] # Copy over value if timeStr != "0" and dataModel[header] > 5: dataModel["count_%s" % timeStr] += 5 (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) # Indicate that cache data needs to be updated self.dataManager = DataManager() self.dataManager.clearCacheData("analyzedPatientCount") self.dataManager.clearCacheData("clinicalItemCountsUpdated") self.recommender = ItemAssociationRecommender()
def action_default(self): """Look for related orders by association / recommender methods""" self.recommender = ItemAssociationRecommender() # Instance to test on self.recommender.dataManager.dataCache = webDataCache query = RecommenderQuery() if self.requestData["sortField"] == "": self.requestData["sortField"] = "P-YatesChi2-NegLog" # P-Fisher-NegLog should yield better results, but beware, much longer to calculate query.parseParams(self.requestData) if len(query.excludeItemIds) == 0: query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds( ) if len(query.excludeCategoryIds) == 0: query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds( ) #query.fieldList.extend( ["prevalence","PPV","RR"] ); displayFields = list() if self.requestData["displayFields"] != "": displayFields = self.requestData["displayFields"].split(",") recommendedData = self.recommender(query) if len(recommendedData) > 0: # Denormalize results with links to clinical item descriptions self.recommender.formatRecommenderResults(recommendedData) # Display fields should append Format suffix to identify which version to display, but use original for header labels (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed ) = self.prepareDisplayHeaders(displayFields) # Format for HTML and add a control field for interaction with the data for dataModel in recommendedData: self.prepareResultRow(dataModel, displayFields) # Try organize by category if self.requestData["groupByCategory"]: recommendedData = self.recommender.organizeByCategory( recommendedData) colNames = ["controls"] # "name" for code. ,"category_description" colNames.extend(displayFieldsFormatSuffixed) colNames.extend(["description"]) lastModel = None htmlLines = list() for dataModel in recommendedData: newCategory = (lastModel is None or lastModel["category_description"] != dataModel["category_description"]) showCategory = (self.requestData["groupByCategory"] and newCategory) # Limit category display if many repeats if showCategory: htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel) htmlLines.append( self.formatRowHTML(dataModel, colNames, showCategory)) lastModel = dataModel self.requestData["dataRows"] = str.join("\n", htmlLines)