예제 #1
0
    def test_tripleSequence_virtualItem(self):
        # Test outcome assessment when the target is a virtual item based on the presence of a triple (instead of double) sequence of items
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-22222])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        analysisQuery.sequenceItemIdsByVirtualItemId[-16] = (-15, -14)
        #analysisQuery.recommender = BaselineFrequencyRecommender();
        analysisQuery.recommender = ItemAssociationRecommender()
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-16])
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Initial run without time limits on outcome measure
        colNames = ["patient_id", "outcome.-16", "score.-16"]
        expectedResults = [RowItemModel([-22222, +1, 0.14286], colNames)]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o",
            "-16=-15:-14", "-m", "0", "-R", "ItemAssociationRecommender",
            '0,-22222', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)
예제 #2
0
    def __init__(self):
        BaseDynamicData.__init__(self)

        self.requestData["queryItemIds"] = ""
        self.requestData["targetItemIds"] = ""
        self.requestData["excludeItemIds"] = ""
        self.requestData["excludeCategoryIds"] = ""
        self.requestData["timeDeltaMax"] = ""
        self.requestData["sortField"] = "PPV"
        self.requestData["sortReverse"] = "True"
        self.requestData["resultCount"] = "10"
        self.requestData["invertQuery"] = ""
        self.requestData["showCounts"] = ""
        self.requestData["countPrefix"] = ""
        self.requestData["aggregationMethod"] = "weighted"

        self.requestData["fieldHeaders"] = ""
        self.requestData["dataRows"] = ""

        self.addHandler("resultCount",
                        ItemRecommendationTable.action_default.__name__)

        self.recommender = ItemAssociationRecommender()
        # Instance to test on
        self.recommender.dataManager.dataCache = webDataCache
    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111])
        analysisQuery.recommender = BaselineFrequencyRecommender()
        #analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Don't use items whose default is to be excluded from recommendations
        #recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(conn=conn);
        #recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds(conn=conn);
        #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) );  # Time delta to use for queries, otherwise just default to all times

        colNames = [
            "patient_id", "clinical_item_id", "iItem", "iRecItem", "recRank",
            "recScore"
        ]

        # Start with default recommender
        expectedResults = \
            [
                (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.170),    Don't care about specific scores, as long as ranks are correct
                (-11111,-10, 1, 1, 4, SENTINEL_ANY_FLOAT),    #0.032),
                (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT),    #0.025),
                (-11111,-12, 4, 3, 2, SENTINEL_ANY_FLOAT),    #0.053),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)

        # Now try targeted recommender
        analysisQuery.recommender = ItemAssociationRecommender()
        expectedResults = \
            [   (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.167),
                (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT),    #0.304),
                (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT),    #0.190),
                (-11111,-12, 4, 3, 1, SENTINEL_ANY_FLOAT),    #0.444),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)

        # Repeat, but put a limit on maximum number of query items and recommendations we want analyzed
        analysisQuery.queryItemMax = 2
        expectedResults = \
            [   (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.167),
                (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT),    #0.304),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)
예제 #4
0
print("Creating clinical_item_id to description map")
id2description = {}
clinical_items = open('/Users/jwang/Desktop/Results/clinical_items.csv', "rU")
clinical_items.readline()
for line in clinical_items:
    line = line.strip().split(",")
    clinical_item_id = line[0]
    description = " ".join(line[1:])
    id2description[clinical_item_id] = description

# Reopen diagnoses, from the top of the file
diagnoses = open('/Users/jwang/Desktop/Results/diagnoses_to_test.csv', "rU")
diagnoses.readline()

baseQueryStr = "&targetItemIds=&excludeItemIds=71052,71046,71054,71083,71045,71047&excludeCategoryIds=1,58,4,2,160,161,59,13,159,163,23,62,18,11,46,2&timeDeltaMax=86400&sortField=P-YatesChi2-NegLog&sortReverse=True&filterField1=prevalence<:&filterField2=PPV<:&filterField3=RR<:&filterField4=sensitivity<:&filterField5=P-YatesChi2<:&resultCount=4000&invertQuery=false&showCounts=true&countPrefix=patient_&aggregationMethod=weighted&cacheTime=0"
recommender = ItemAssociationRecommender()

diagnosis_count = 0
for line in diagnoses:
    line = line.strip().split(",")
    clinical_item_id = line[0]
    description = " ".join(line[1:])
    queryStr = "queryItemIds=" + str(clinical_item_id) + baseQueryStr
    print('Finding Top Associations for "{0}"'.format(description))

    # Build RecommenderQuery
    query = RecommenderQuery()
    paramDict = dict(urlparse.parse_qsl(queryStr, True))
    query.parseParams(paramDict)

    # Call ItemRecommender
예제 #5
0
파일: RelatedOrders.py 프로젝트: xxxx3/CDSS
    def action_default(self):
        """Look for related orders by association / recommender methods"""
        # If patient is specified then modify query and exclusion list based on items already ordered for patient
        recentItemIds = set()
        if self.requestData["sim_patient_id"]:
            patientId = int(self.requestData["sim_patient_id"])
            simTime = int(self.requestData["sim_time"])

            # Track recent item IDs (orders, diagnoses, unlocked results, etc. that related order queries will be based off of)
            manager = SimManager()
            recentItemIds = manager.recentItemIds(patientId, simTime)

        # Recommender Instance to test on
        self.recommender = ItemAssociationRecommender()
        self.recommender.dataManager.dataCache = webDataCache
        # Allow caching of data for rapid successive queries

        query = RecommenderQuery()
        if self.requestData["sortField"] == "":
            self.requestData["sortField"] = "P-YatesChi2-NegLog"
            # P-Fisher-NegLog should yield better results, but beware, much longer to calculate
        query.parseParams(self.requestData)
        if len(query.excludeItemIds) == 0:
            query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds(
            )
        if len(query.excludeCategoryIds) == 0:
            query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds(
            )
        #query.fieldList.extend( ["prevalence","PPV","RR"] );
        displayFields = list()
        if self.requestData["displayFields"] != "":
            displayFields = self.requestData["displayFields"].split(",")

        # Exclude items already ordered for the patient from any recommended list
        query.excludeItemIds.update(recentItemIds)
        if not query.queryItemIds:  # If no specific query items specified, then use the recent patient item IDs
            query.queryItemIds.update(recentItemIds)

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Try organize by category
        if self.requestData["groupByCategory"]:
            recommendedData = self.recommender.organizeByCategory(
                recommendedData)

        colNames = ["controls"]
        # "name" for code. ,"category_description"
        colNames.extend(displayFieldsFormatSuffixed)
        colNames.extend(["description"])

        lastModel = None
        htmlLines = list()
        for dataModel in recommendedData:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)
    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery();
        analysisQuery.patientIds = set([-11111]);
        analysisQuery.numQueryItems = 1;
        analysisQuery.numVerifyItems = 3;
        analysisQuery.numRecommendations = 4;
        analysisQuery.recommender = BaselineFrequencyRecommender();
        #analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery();
        analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data

        # Don't use items whose default is to be excluded from recommendations
        analysisQuery.baseRecQuery.excludeCategoryIds = analysisQuery.recommender.defaultExcludedClinicalItemCategoryIds();
        analysisQuery.baseRecQuery.excludeItemIds = analysisQuery.recommender.defaultExcludedClinicalItemIds();
        #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) );  # Time delta to use for queries, otherwise just default to all times

        colNames = ["patient_id", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "normalRecall","normalPrecision", "ROC-AUC"];
        
        # Start with default recommender
        expectedResults = [ RowItemModel([-11111,  1,2,3,  0.333, 0.25, 0.286,  0.208, 0.254, 0.333/1.0, 0.25/0.75, 0.524], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line interface
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","BaselineFrequencyRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","BaselineFrequencyRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);


        
       
        # Now try targeted recommender
        analysisQuery.recommender = ItemAssociationRecommender();
        expectedResults = [ RowItemModel([-11111,  1,2,3,  0.333, 0.25, 0.286,  0.347, 0.293, 0.333, 0.25/0.75, 0.6666], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);




        # Now try multiple query items targeted recommender
        analysisQuery.numQueryItems = 2;
        expectedResults = [ RowItemModel([-11111, 1, 2, 3,  0.333, 0.25, 0.286,  0.254, 0.194, 0.333, 0.25/0.75, 0.4167], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","2","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","2","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);



        # More query items with aggregation options
        analysisQuery.numQueryItems = 3;
        expectedResults = [ RowItemModel([-11111, 1, 1, 3,   0.5, 0.25, 0.333,  0.517, 0.194, 0.5, 0.25/0.5, 0.4166], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);






        # Value filters
        analysisQuery.baseRecQuery.sortField= "freqRatio";
        analysisQuery.baseRecQuery.fieldFilters["freqRatio>"] = 70;
        expectedResults = [ RowItemModel([-11111, 2, 0, 2,   1.0, 0.5, 0.6666,  1.0, 0.446, 1.0, 0.5/0.5, 0.375], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);
        del analysisQuery.baseRecQuery.fieldFilters["freqRatio>"];  # Undo to not affect subsequent queries

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-f","freqRatio>:70.0","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-s","freqRatio","-f","freqRatio>:70.0",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);





        # Unweighted aggregation
        analysisQuery.baseRecQuery.weightingMethod = "unweighted";
        expectedResults = [ RowItemModel([-11111, 1, 1, 3,   0.5, 0.25, 0.3333,  0.517, 0.194, 0.5, 0.25/0.5, 0.25], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);




        # Run by equivalent query time span selection rather than explicit counts
        colNames = ["patient_id", "baseItemId", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"];
        expectedResults = [ RowItemModel([-11111, -4, 1, 1, 3,   0.5, 0.25, 0.333,  0.517, 0.194, 0.4167], colNames ) ];

        analysisQuery.baseRecQuery.sortField= "conditionalFreq";
        analysisQuery.numQueryItems = None;
        analysisQuery.numVerifyItems = None;
        analysisQuery.baseCategoryId = -1;
        analysisQuery.queryTimeSpan = timedelta(0,3*60*60);
        analysisQuery.verifyTimeSpan = timedelta(50,0);
        analysisQuery.numRecommendations = 4;
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-c","-1","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-c","-1","-Q","5400","-V","4320000",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);



        # Run by query time span by identifying base clinical item, rather than a general category
        analysisQuery.numQueryItems = None;
        analysisQuery.numVerifyItems = None;
        analysisQuery.baseCategoryId = None;    # Clear prior setting
        analysisQuery.baseItemId = -4;
        analysisQuery.queryTimeSpan = timedelta(0,3*60*60);
        analysisQuery.verifyTimeSpan = timedelta(50,0);
        analysisQuery.numRecommendations = 4;
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-b","-4","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-b","-4","-Q","5400","-V","4320000",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);




        # Basic then Filter test data date range
        colNames = ["patient_id", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"];
        expectedResults = [ RowItemModel([-11111, 1, 1, 3,   0.5, 0.25, 0.33333,  0.4375, 0.29319, 0.66667], colNames ) ];
        analysisQuery = AnalysisQuery();
        analysisQuery.patientIds = set([-11111]);
        analysisQuery.numQueryItems = 1;
        analysisQuery.numVerifyItems = 2;
        analysisQuery.numRecommendations = 4;
        analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery();
        analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","2",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);



        # Date Filters
        colNames = ["patient_id", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"];
        expectedResults = [ RowItemModel([-11111, 0, 1, 2,   0.0, 0.0, 0.0,  0.0, 0.0, None], colNames ) ];
        analysisQuery = AnalysisQuery();
        analysisQuery.patientIds = set([-11111]);
        analysisQuery.numQueryItems = 1;
        analysisQuery.numVerifyItems = 2;
        analysisQuery.numRecommendations = 4;
        analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery();
        analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data
        analysisQuery.startDate = datetime(2000,1,1,1);
        analysisQuery.endDate = datetime(2000,1,10);
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-S","2000-01-01 01:00:00","-E","2000-01-10","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","2","-S","2000-01-01 01:00:00","-E","2000-01-10",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);
예제 #7
0
    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        #analysisQuery.recommender = BaselineFrequencyRecommender();
        analysisQuery.recommender = ItemAssociationRecommender()
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-33, -32, -31, -30])
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Initial run without time limits on outcome measure
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, +0, 0.222, +2, 0.611, +1, 0.222, +1, 0.222],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o",
            "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender",
            '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Now try with time limitation on outcome measure
        analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 604800)
        # 1 week
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, +0, 0.222, +2, 0.611, +0, 0.222, +1, 0.222],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t",
            "604800", "-o", "-33,-32,-31,-30", "-m", "0", "-R",
            "ItemAssociationRecommender", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-t", "604800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", "-t", "604800", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Again, but with much stricter time limit (negative test case)
        analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 172800)
        # 2 day
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, 0, 0.0109, 2, 0.0600, 0, 0.0109, 0, 0.0109],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t",
            "172800", "-o", "-33,-32,-31,-30", "-m", "0", "-R",
            "ItemAssociationRecommender", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-t", "172800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", "-t", "172800", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)
예제 #8
0
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        log.info("Populate the database with test data")

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = ["clinical_item_id", "clinical_item_category_id", "name"]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC"], headers ),
                RowItemModel( [-2, -1, "BMP"], headers ),
                RowItemModel( [-3, -1, "Hepatic Panel"], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ),
                RowItemModel( [-5, -2, "CXR"], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax"], headers ),
                RowItemModel( [-9, -3, "Acetaminophen"], headers ),
                RowItemModel( [-10, -3, "Carvedilol"], headers ),
                RowItemModel( [-11, -3, "Enoxaparin"], headers ),
                RowItemModel( [-12, -3, "Warfarin"], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone"], headers ),
                RowItemModel( [-14, -4, "Foley Catheter"], headers ),
                RowItemModel( [-15, -4, "Strict I&O"], headers ),
                RowItemModel( [-16, -4, "Fall Precautions"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-4,  -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-10, -22222, -7,  datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-14, -33333, -6,  datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-15, -33333, -2,  datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = \
            [   "clinical_item_id","subsequent_item_id",
                "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any",
                "time_diff_sum", "time_diff_sum_squares",
            ]
        dataModels = \
            [
                RowItemModel( [ -1, -1,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -2, -2,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -3, -3,   95, 95, 97, 97, 97,  0.0, 0.0], headers ),
                RowItemModel( [ -4, -4,   40, 40, 40, 40, 40,  0.0, 0.0], headers ),
                RowItemModel( [ -5, -5,   40, 40, 50, 50, 50,  0.0, 0.0], headers ),
                RowItemModel( [ -6, -6,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),


                RowItemModel( [ -2, -3,    0,  0,  0,  0,  0,    0.0,     0.0], headers ),  # Zero count associations, probably shouldn't even be here. If so, ignore them anyway
                RowItemModel( [ -2, -4,    0,  2,  3,  3,  3,  200.0, 50000.0], headers ),
                RowItemModel( [ -2, -6,    2,  2,  5,  5,  5,  300.0, 11990.0], headers ),
                RowItemModel( [ -3, -1,   20, 23, 23, 23, 23,  400.0, 344990.0], headers ),
                RowItemModel( [ -4, -5,    3,  3, 13, 43, 43,  340.0, 343110.0], headers ),
                RowItemModel( [ -4, -6,   23, 33, 33, 33, 63,  420.0, 245220.0], headers ),
                RowItemModel( [ -4, -7,   23, 33, 33, 33, 63,   40.0, 5420.0], headers ),
                RowItemModel( [ -5, -4,    0,  0, 20, 20, 20,  540.0, 54250.0], headers ),

                RowItemModel( [ -6, -2,    7,   7,   7,   7,   7,  1.0, 1.0], headers ),
                RowItemModel( [ -6, -4,   20,  20,  20,  20,  20,  1.0, 1.0], headers ),
            ]
        for dataModel in dataModels:
            # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval)
            for header in headers:
                if header.startswith("patient_count_"):
                    timeStr = header[len("patient_count_"):]
                    dataModel["count_%s" % timeStr] = dataModel[header]
                    # Copy over value

                    if timeStr != "0" and dataModel[header] > 5:
                        dataModel["count_%s" % timeStr] += 5
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)

        # Indicate that cache data needs to be updated
        self.dataManager = DataManager()
        self.dataManager.clearCacheData("analyzedPatientCount")
        self.dataManager.clearCacheData("clinicalItemCountsUpdated")

        self.recommender = ItemAssociationRecommender()
예제 #9
0
    def action_default(self):
        """Look for related orders by association / recommender methods"""
        self.recommender = ItemAssociationRecommender()
        # Instance to test on
        self.recommender.dataManager.dataCache = webDataCache

        query = RecommenderQuery()
        if self.requestData["sortField"] == "":
            self.requestData["sortField"] = "P-YatesChi2-NegLog"
            # P-Fisher-NegLog should yield better results, but beware, much longer to calculate
        query.parseParams(self.requestData)
        if len(query.excludeItemIds) == 0:
            query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds(
            )
        if len(query.excludeCategoryIds) == 0:
            query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds(
            )
        #query.fieldList.extend( ["prevalence","PPV","RR"] );
        displayFields = list()
        if self.requestData["displayFields"] != "":
            displayFields = self.requestData["displayFields"].split(",")

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Try organize by category
        if self.requestData["groupByCategory"]:
            recommendedData = self.recommender.organizeByCategory(
                recommendedData)

        colNames = ["controls"]
        # "name" for code. ,"category_description"
        colNames.extend(displayFieldsFormatSuffixed)
        colNames.extend(["description"])

        lastModel = None
        htmlLines = list()
        for dataModel in recommendedData:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)