예제 #1
0
    def assertEqualRecommendedData(self, expectedData, recommendedData, query):
        """Run assertEqualGeneral on the key components of the contents of the recommendation data.
        Don't necessarily care about the specific numbers that come out of the recommendations,
        but do care about consistency in rankings and relative order by the query.sortField
        """
        lastScore = None
        for expectedItem, recommendedItem in zip(expectedData,
                                                 recommendedData):
            # Ensure derived statistics are populated to enable comparisons
            ItemAssociationRecommender.populateDerivedStats(
                recommendedItem, expectedItem.keys())

            self.assertEqualDict(expectedItem, recommendedItem,
                                 ["clinical_item_id"])
            for key in expectedItem.iterkeys(
            ):  # If specified, then verify a specific values
                if isinstance(expectedItem[key], float):
                    self.assertAlmostEquals(expectedItem[key],
                                            recommendedItem[key], 5)
                else:
                    self.assertEqual(expectedItem[key], recommendedItem[key])
            if lastScore is not None:
                self.assertTrue(recommendedItem[query.sortField] <= lastScore)
                # Verify descending order of scores
            lastScore = recommendedItem[query.sortField]

        self.assertEqual(len(expectedData), len(recommendedData))
예제 #2
0
    def __init__(self):
        BaseDynamicData.__init__(self)

        self.requestData["queryItemIds"] = ""
        self.requestData["targetItemIds"] = ""
        self.requestData["excludeItemIds"] = ""
        self.requestData["excludeCategoryIds"] = ""
        self.requestData["timeDeltaMax"] = ""
        self.requestData["sortField"] = "PPV"
        self.requestData["sortReverse"] = "True"
        self.requestData["resultCount"] = "10"
        self.requestData["invertQuery"] = ""
        self.requestData["showCounts"] = ""
        self.requestData["countPrefix"] = ""
        self.requestData["aggregationMethod"] = "weighted"

        self.requestData["fieldHeaders"] = ""
        self.requestData["dataRows"] = ""

        self.addHandler("resultCount",
                        ItemRecommendationTable.action_default.__name__)

        self.recommender = ItemAssociationRecommender()
        # Instance to test on
        self.recommender.dataManager.dataCache = webDataCache
예제 #3
0
 def assertEqualRecommendedDataStats(self, expectedData, recommendedData, headers):
     """Run assertEqualGeneral on the key components of the contents of the recommendation data.
     In this case, we do want to verify actual score / stat values match
     """
     self.assertEqual( len(expectedData), len(recommendedData) );
     for expectedItem, recommendedItem in zip(expectedData, recommendedData):
         # Ensure the recommendedData has all fields of interest populated / calculated
         ItemAssociationRecommender.populateDerivedStats( recommendedItem, headers );
         for header in headers:
             expectedValue = expectedItem[header];
             recommendedValue = recommendedItem[header];
             msg = 'Dicts diff with key (%s).  Verify = %s, Sample = %s' % (header, expectedValue, recommendedValue);
             self.assertAlmostEqual(expectedValue, recommendedValue, 3, msg);
예제 #4
0
    def test_tripleSequence_virtualItem(self):
        # Test outcome assessment when the target is a virtual item based on the presence of a triple (instead of double) sequence of items
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-22222])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        analysisQuery.sequenceItemIdsByVirtualItemId[-16] = (-15, -14)
        #analysisQuery.recommender = BaselineFrequencyRecommender();
        analysisQuery.recommender = ItemAssociationRecommender()
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-16])
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Initial run without time limits on outcome measure
        colNames = ["patient_id", "outcome.-16", "score.-16"]
        expectedResults = [RowItemModel([-22222, +1, 0.14286], colNames)]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o",
            "-16=-15:-14", "-m", "0", "-R", "ItemAssociationRecommender",
            '0,-22222', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)
예제 #5
0
 def isItemRecommendable(self, clinicalItemId, queryItemCountById, recQuery,
                         categoryIdByItemId):
     """Decide if the next clinical item could even possibly appear
     in the recommendation list.  (Because if not, no point in trying to
     test recommender against it).
     """
     return ItemAssociationRecommender.isItemRecommendable(
         clinicalItemId, queryItemCountById, recQuery, categoryIdByItemId)
    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111])
        analysisQuery.recommender = BaselineFrequencyRecommender()
        #analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Don't use items whose default is to be excluded from recommendations
        #recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(conn=conn);
        #recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds(conn=conn);
        #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) );  # Time delta to use for queries, otherwise just default to all times

        colNames = [
            "patient_id", "clinical_item_id", "iItem", "iRecItem", "recRank",
            "recScore"
        ]

        # Start with default recommender
        expectedResults = \
            [
                (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.170),    Don't care about specific scores, as long as ranks are correct
                (-11111,-10, 1, 1, 4, SENTINEL_ANY_FLOAT),    #0.032),
                (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT),    #0.025),
                (-11111,-12, 4, 3, 2, SENTINEL_ANY_FLOAT),    #0.053),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)

        # Now try targeted recommender
        analysisQuery.recommender = ItemAssociationRecommender()
        expectedResults = \
            [   (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.167),
                (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT),    #0.304),
                (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT),    #0.190),
                (-11111,-12, 4, 3, 1, SENTINEL_ANY_FLOAT),    #0.444),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)

        # Repeat, but put a limit on maximum number of query items and recommendations we want analyzed
        analysisQuery.queryItemMax = 2
        expectedResults = \
            [   (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.167),
                (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT),    #0.304),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)
예제 #7
0
print("Creating clinical_item_id to description map")
id2description = {}
clinical_items = open('/Users/jwang/Desktop/Results/clinical_items.csv', "rU")
clinical_items.readline()
for line in clinical_items:
    line = line.strip().split(",")
    clinical_item_id = line[0]
    description = " ".join(line[1:])
    id2description[clinical_item_id] = description

# Reopen diagnoses, from the top of the file
diagnoses = open('/Users/jwang/Desktop/Results/diagnoses_to_test.csv', "rU")
diagnoses.readline()

baseQueryStr = "&targetItemIds=&excludeItemIds=71052,71046,71054,71083,71045,71047&excludeCategoryIds=1,58,4,2,160,161,59,13,159,163,23,62,18,11,46,2&timeDeltaMax=86400&sortField=P-YatesChi2-NegLog&sortReverse=True&filterField1=prevalence<:&filterField2=PPV<:&filterField3=RR<:&filterField4=sensitivity<:&filterField5=P-YatesChi2<:&resultCount=4000&invertQuery=false&showCounts=true&countPrefix=patient_&aggregationMethod=weighted&cacheTime=0"
recommender = ItemAssociationRecommender()

diagnosis_count = 0
for line in diagnoses:
    line = line.strip().split(",")
    clinical_item_id = line[0]
    description = " ".join(line[1:])
    queryStr = "queryItemIds=" + str(clinical_item_id) + baseQueryStr
    print('Finding Top Associations for "{0}"'.format(description))

    # Build RecommenderQuery
    query = RecommenderQuery()
    paramDict = dict(urlparse.parse_qsl(queryStr, True))
    query.parseParams(paramDict)

    # Call ItemRecommender
예제 #8
0
파일: RelatedOrders.py 프로젝트: xxxx3/CDSS
class RelatedOrders(BaseDynamicData):
    """Simple script to (dynamically) relay query and result data
    from the ItemRecommendation module in URL request then HTML table format.
    """
    def __init__(self):
        BaseDynamicData.__init__(self)

        self.requestData["searchStr"] = ""
        self.requestData["analysisStatus"] = "1"

        self.requestData["sim_patient_id"] = ""
        self.requestData["sim_time"] = ""

        self.requestData["sourceTables"] = "stride_order_proc,stride_order_med"
        # Default comma-separated list of source tables to expect orders to reside in
        self.requestData["queryItemIds"] = ""
        self.requestData["targetItemIds"] = ""
        self.requestData["excludeItemIds"] = ""
        self.requestData["excludeCategoryIds"] = ""
        self.requestData["timeDeltaMax"] = "86400"
        # Look for recommendations likely within 24 hours
        self.requestData["sortField"] = ""
        self.requestData["enableRecommender"] = "True"
        # By default, asssume recommender is enabled
        self.requestData["displayFields"] = ""
        #"prevalence","PPV","RR","P-YatesChi2"
        self.requestData["sortReverse"] = "True"
        self.requestData["nPreCols"] = "1"
        self.requestData["groupByCategory"] = "True"
        self.requestData["resultCount"] = "10"
        # Default for related order search
        self.requestData["invertQuery"] = ""
        self.requestData["showCounts"] = ""
        self.requestData["countPrefix"] = "patient_"
        self.requestData["aggregationMethod"] = "weighted"

        self.requestData["title"] = "Order Search Results"
        self.requestData["fieldHeaders"] = ""
        self.requestData[
            "dataRows"] = '<tr><td colspan=100 align=center height=200><img src="../../resource/ajax-loader.gif"></td></tr>'

        self.addHandler("searchStr", RelatedOrders.action_orderSearch.__name__)
        self.addHandler("RelatedOrders", RelatedOrders.action_default.__name__)

    def action_orderSearch(self):
        """Search for orders by query string"""
        manager = SimManager()
        query = ClinicalItemQuery()
        query.parseParams(self.requestData)
        query.sourceTables = self.requestData["sourceTables"].split(",")
        results = manager.clinicalItemSearch(query)

        lastModel = None
        for dataModel in results:
            dataModel["controls"] = CONTROLS_TEMPLATE % dataModel
            dataModel["nPreCols"] = self.requestData["nPreCols"]
            dataModel["category_description.format"] = ""
            if lastModel is None or lastModel[
                    "category_description"] != dataModel[
                        "category_description"]:
                dataModel[
                    "category_description.format"] = "<b>%s</b>" % dataModel[
                        "category_description"]
                # Only show category if new
            lastModel = dataModel

        colNames = ["controls", "description"]
        # "name" for order code. ,"category_description.format"
        lastModel = None
        htmlLines = list()
        for dataModel in results:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)

    def action_default(self):
        """Look for related orders by association / recommender methods"""
        # If patient is specified then modify query and exclusion list based on items already ordered for patient
        recentItemIds = set()
        if self.requestData["sim_patient_id"]:
            patientId = int(self.requestData["sim_patient_id"])
            simTime = int(self.requestData["sim_time"])

            # Track recent item IDs (orders, diagnoses, unlocked results, etc. that related order queries will be based off of)
            manager = SimManager()
            recentItemIds = manager.recentItemIds(patientId, simTime)

        # Recommender Instance to test on
        self.recommender = ItemAssociationRecommender()
        self.recommender.dataManager.dataCache = webDataCache
        # Allow caching of data for rapid successive queries

        query = RecommenderQuery()
        if self.requestData["sortField"] == "":
            self.requestData["sortField"] = "P-YatesChi2-NegLog"
            # P-Fisher-NegLog should yield better results, but beware, much longer to calculate
        query.parseParams(self.requestData)
        if len(query.excludeItemIds) == 0:
            query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds(
            )
        if len(query.excludeCategoryIds) == 0:
            query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds(
            )
        #query.fieldList.extend( ["prevalence","PPV","RR"] );
        displayFields = list()
        if self.requestData["displayFields"] != "":
            displayFields = self.requestData["displayFields"].split(",")

        # Exclude items already ordered for the patient from any recommended list
        query.excludeItemIds.update(recentItemIds)
        if not query.queryItemIds:  # If no specific query items specified, then use the recent patient item IDs
            query.queryItemIds.update(recentItemIds)

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Try organize by category
        if self.requestData["groupByCategory"]:
            recommendedData = self.recommender.organizeByCategory(
                recommendedData)

        colNames = ["controls"]
        # "name" for code. ,"category_description"
        colNames.extend(displayFieldsFormatSuffixed)
        colNames.extend(["description"])

        lastModel = None
        htmlLines = list()
        for dataModel in recommendedData:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)

    def prepareDisplayHeaders(self, displayFields):
        showCounts = (self.requestData["showCounts"].lower()
                      not in FALSE_STRINGS)

        fieldHeadersHTML = ""
        for displayField in displayFields:
            fieldHeadersHTML += '<th nowrap>' + displayField + '</th>'
        if showCounts:
            fieldHeadersHTML += '<th>' + str.join('</th><th>',
                                                  CORE_FIELDS) + '</th>'

        displayFieldsFormatSuffixed = list()
        for field in displayFields:
            displayFieldsFormatSuffixed.append('%sFormat' % field)
        if showCounts:
            for field in CORE_FIELDS:
                displayFieldsFormatSuffixed.append('%sFormat' % field)

        return (fieldHeadersHTML, displayFieldsFormatSuffixed)

    def prepareResultRow(self, dataModel, displayFields):
        dataModel["controls"] = CONTROLS_TEMPLATE % dataModel
        dataModel["nPreCols"] = len(displayFields) + 1
        # Track spacer columns leading up to order description. +1 for control column
        dataModel["name"] = dataModel["name"].replace(",", "-")

        if "nB" in dataModel:
            if "nAB" not in dataModel:
                # Baseline query without query items, use matching numbers to ensure calculations will have something to process
                dataModel["nAB"] = dataModel["nB"]
                dataModel["nA"] = dataModel["N"]
            nAB = dataModel["nAB"]
            nA = dataModel["nA"]
            nB = dataModel["nB"]
            N = dataModel["N"]
            contStats = ContingencyStats(nAB, nA, nB, N)
            contStats.normalize(truncateNegativeValues=False)

        for field in displayFields:
            if field not in dataModel:
                # Unavailable field, see if it is a derived field that can be calculated
                dataModel[field] = contStats[field]

            if field in CORE_FIELDS:
                pass
            elif field in PERCENT_FIELDS:
                # Format as a percentage
                dataModel["%sFormat" %
                          field] = "%d%%" % (dataModel[field] * 100)
            elif abs(dataModel[field]) < 0.01:
                # Allow formatting for very small values
                dataModel["%sFormat" % field] = "%.0e" % dataModel[field]
            elif abs(dataModel[field]) < 1:
                # Smaller value, show more significant digits
                dataModel["%sFormat" % field] = "%.2f" % dataModel[field]
            else:
                # Default just format as limited floating point values
                dataModel["%sFormat" % field] = "%.1f" % dataModel[field]

        for field in CORE_FIELDS:
            # Count fields express as integers, assuming available at all
            if field in BASELINE_FIELDS:
                dataModel["%sFormat" % field] = "%d" % dataModel[field]
            else:
                # May have small virtual counts from derived scenarios
                if dataModel[field] > 10:
                    dataModel["%sFormat" % field] = "%.1f" % dataModel[field]
                else:
                    dataModel["%sFormat" % field] = "%.2f" % dataModel[field]

    def formatRowHTML(self, dataModel, colNames, showCategory=True):
        """Specific formatting for row data elements
        """
        htmlList = list()
        htmlList.append('<tr valign=top>')
        for col in colNames:
            if col == "category_description":  # Blank out repeat categories
                if showCategory:
                    htmlList.append(
                        '<td align=center><b>%(category_description)s</b></td>'
                        % dataModel)
                else:
                    htmlList.append('<td></td>')
            elif col == "description":
                htmlList.append('<td align=left>')
                htmlList.append(DESCRIPTION_TEMPLATE % dataModel)
                # Only include related link if recommender is enabled
                if self.requestData['enableRecommender'] == "True":
                    htmlList.append(RELATED_LINK_TEMPLATE % dataModel)
                htmlList.append('</td>')
            else:
                htmlList.append('<td align=right>%s</td>' % dataModel[col])
        htmlList.append('</tr>')
        return str.join("\n", htmlList)
예제 #9
0
파일: RelatedOrders.py 프로젝트: xxxx3/CDSS
    def action_default(self):
        """Look for related orders by association / recommender methods"""
        # If patient is specified then modify query and exclusion list based on items already ordered for patient
        recentItemIds = set()
        if self.requestData["sim_patient_id"]:
            patientId = int(self.requestData["sim_patient_id"])
            simTime = int(self.requestData["sim_time"])

            # Track recent item IDs (orders, diagnoses, unlocked results, etc. that related order queries will be based off of)
            manager = SimManager()
            recentItemIds = manager.recentItemIds(patientId, simTime)

        # Recommender Instance to test on
        self.recommender = ItemAssociationRecommender()
        self.recommender.dataManager.dataCache = webDataCache
        # Allow caching of data for rapid successive queries

        query = RecommenderQuery()
        if self.requestData["sortField"] == "":
            self.requestData["sortField"] = "P-YatesChi2-NegLog"
            # P-Fisher-NegLog should yield better results, but beware, much longer to calculate
        query.parseParams(self.requestData)
        if len(query.excludeItemIds) == 0:
            query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds(
            )
        if len(query.excludeCategoryIds) == 0:
            query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds(
            )
        #query.fieldList.extend( ["prevalence","PPV","RR"] );
        displayFields = list()
        if self.requestData["displayFields"] != "":
            displayFields = self.requestData["displayFields"].split(",")

        # Exclude items already ordered for the patient from any recommended list
        query.excludeItemIds.update(recentItemIds)
        if not query.queryItemIds:  # If no specific query items specified, then use the recent patient item IDs
            query.queryItemIds.update(recentItemIds)

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Try organize by category
        if self.requestData["groupByCategory"]:
            recommendedData = self.recommender.organizeByCategory(
                recommendedData)

        colNames = ["controls"]
        # "name" for code. ,"category_description"
        colNames.extend(displayFieldsFormatSuffixed)
        colNames.extend(["description"])

        lastModel = None
        htmlLines = list()
        for dataModel in recommendedData:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)
    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery();
        analysisQuery.patientIds = set([-11111]);
        analysisQuery.numQueryItems = 1;
        analysisQuery.numVerifyItems = 3;
        analysisQuery.numRecommendations = 4;
        analysisQuery.recommender = BaselineFrequencyRecommender();
        #analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery();
        analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data

        # Don't use items whose default is to be excluded from recommendations
        analysisQuery.baseRecQuery.excludeCategoryIds = analysisQuery.recommender.defaultExcludedClinicalItemCategoryIds();
        analysisQuery.baseRecQuery.excludeItemIds = analysisQuery.recommender.defaultExcludedClinicalItemIds();
        #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) );  # Time delta to use for queries, otherwise just default to all times

        colNames = ["patient_id", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "normalRecall","normalPrecision", "ROC-AUC"];
        
        # Start with default recommender
        expectedResults = [ RowItemModel([-11111,  1,2,3,  0.333, 0.25, 0.286,  0.208, 0.254, 0.333/1.0, 0.25/0.75, 0.524], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line interface
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","BaselineFrequencyRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","BaselineFrequencyRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);


        
       
        # Now try targeted recommender
        analysisQuery.recommender = ItemAssociationRecommender();
        expectedResults = [ RowItemModel([-11111,  1,2,3,  0.333, 0.25, 0.286,  0.347, 0.293, 0.333, 0.25/0.75, 0.6666], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);




        # Now try multiple query items targeted recommender
        analysisQuery.numQueryItems = 2;
        expectedResults = [ RowItemModel([-11111, 1, 2, 3,  0.333, 0.25, 0.286,  0.254, 0.194, 0.333, 0.25/0.75, 0.4167], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","2","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","2","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);



        # More query items with aggregation options
        analysisQuery.numQueryItems = 3;
        expectedResults = [ RowItemModel([-11111, 1, 1, 3,   0.5, 0.25, 0.333,  0.517, 0.194, 0.5, 0.25/0.5, 0.4166], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);






        # Value filters
        analysisQuery.baseRecQuery.sortField= "freqRatio";
        analysisQuery.baseRecQuery.fieldFilters["freqRatio>"] = 70;
        expectedResults = [ RowItemModel([-11111, 2, 0, 2,   1.0, 0.5, 0.6666,  1.0, 0.446, 1.0, 0.5/0.5, 0.375], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);
        del analysisQuery.baseRecQuery.fieldFilters["freqRatio>"];  # Undo to not affect subsequent queries

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-f","freqRatio>:70.0","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-s","freqRatio","-f","freqRatio>:70.0",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);





        # Unweighted aggregation
        analysisQuery.baseRecQuery.weightingMethod = "unweighted";
        expectedResults = [ RowItemModel([-11111, 1, 1, 3,   0.5, 0.25, 0.3333,  0.517, 0.194, 0.5, 0.25/0.5, 0.25], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);




        # Run by equivalent query time span selection rather than explicit counts
        colNames = ["patient_id", "baseItemId", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"];
        expectedResults = [ RowItemModel([-11111, -4, 1, 1, 3,   0.5, 0.25, 0.333,  0.517, 0.194, 0.4167], colNames ) ];

        analysisQuery.baseRecQuery.sortField= "conditionalFreq";
        analysisQuery.numQueryItems = None;
        analysisQuery.numVerifyItems = None;
        analysisQuery.baseCategoryId = -1;
        analysisQuery.queryTimeSpan = timedelta(0,3*60*60);
        analysisQuery.verifyTimeSpan = timedelta(50,0);
        analysisQuery.numRecommendations = 4;
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-c","-1","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-c","-1","-Q","5400","-V","4320000",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);



        # Run by query time span by identifying base clinical item, rather than a general category
        analysisQuery.numQueryItems = None;
        analysisQuery.numVerifyItems = None;
        analysisQuery.baseCategoryId = None;    # Clear prior setting
        analysisQuery.baseItemId = -4;
        analysisQuery.queryTimeSpan = timedelta(0,3*60*60);
        analysisQuery.verifyTimeSpan = timedelta(50,0);
        analysisQuery.numRecommendations = 4;
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-b","-4","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-b","-4","-Q","5400","-V","4320000",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);




        # Basic then Filter test data date range
        colNames = ["patient_id", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"];
        expectedResults = [ RowItemModel([-11111, 1, 1, 3,   0.5, 0.25, 0.33333,  0.4375, 0.29319, 0.66667], colNames ) ];
        analysisQuery = AnalysisQuery();
        analysisQuery.patientIds = set([-11111]);
        analysisQuery.numQueryItems = 1;
        analysisQuery.numVerifyItems = 2;
        analysisQuery.numRecommendations = 4;
        analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery();
        analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","2",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);



        # Date Filters
        colNames = ["patient_id", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"];
        expectedResults = [ RowItemModel([-11111, 0, 1, 2,   0.0, 0.0, 0.0,  0.0, 0.0, None], colNames ) ];
        analysisQuery = AnalysisQuery();
        analysisQuery.patientIds = set([-11111]);
        analysisQuery.numQueryItems = 1;
        analysisQuery.numVerifyItems = 2;
        analysisQuery.numRecommendations = 4;
        analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery();
        analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data
        analysisQuery.startDate = datetime(2000,1,1,1);
        analysisQuery.endDate = datetime(2000,1,10);
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-S","2000-01-01 01:00:00","-E","2000-01-10","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","2","-S","2000-01-01 01:00:00","-E","2000-01-10",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);
예제 #11
0
    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        #analysisQuery.recommender = BaselineFrequencyRecommender();
        analysisQuery.recommender = ItemAssociationRecommender()
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-33, -32, -31, -30])
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Initial run without time limits on outcome measure
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, +0, 0.222, +2, 0.611, +1, 0.222, +1, 0.222],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o",
            "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender",
            '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Now try with time limitation on outcome measure
        analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 604800)
        # 1 week
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, +0, 0.222, +2, 0.611, +0, 0.222, +1, 0.222],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t",
            "604800", "-o", "-33,-32,-31,-30", "-m", "0", "-R",
            "ItemAssociationRecommender", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-t", "604800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", "-t", "604800", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Again, but with much stricter time limit (negative test case)
        analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 172800)
        # 2 day
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, 0, 0.0109, 2, 0.0600, 0, 0.0109, 0, 0.0109],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t",
            "172800", "-o", "-33,-32,-31,-30", "-m", "0", "-R",
            "ItemAssociationRecommender", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-t", "172800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", "-t", "172800", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)
예제 #12
0
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        log.info("Populate the database with test data")

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = ["clinical_item_id", "clinical_item_category_id", "name"]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC"], headers ),
                RowItemModel( [-2, -1, "BMP"], headers ),
                RowItemModel( [-3, -1, "Hepatic Panel"], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ),
                RowItemModel( [-5, -2, "CXR"], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax"], headers ),
                RowItemModel( [-9, -3, "Acetaminophen"], headers ),
                RowItemModel( [-10, -3, "Carvedilol"], headers ),
                RowItemModel( [-11, -3, "Enoxaparin"], headers ),
                RowItemModel( [-12, -3, "Warfarin"], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone"], headers ),
                RowItemModel( [-14, -4, "Foley Catheter"], headers ),
                RowItemModel( [-15, -4, "Strict I&O"], headers ),
                RowItemModel( [-16, -4, "Fall Precautions"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-4,  -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-10, -22222, -7,  datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-14, -33333, -6,  datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-15, -33333, -2,  datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = \
            [   "clinical_item_id","subsequent_item_id",
                "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any",
                "time_diff_sum", "time_diff_sum_squares",
            ]
        dataModels = \
            [
                RowItemModel( [ -1, -1,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -2, -2,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -3, -3,   95, 95, 97, 97, 97,  0.0, 0.0], headers ),
                RowItemModel( [ -4, -4,   40, 40, 40, 40, 40,  0.0, 0.0], headers ),
                RowItemModel( [ -5, -5,   40, 40, 50, 50, 50,  0.0, 0.0], headers ),
                RowItemModel( [ -6, -6,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),


                RowItemModel( [ -2, -3,    0,  0,  0,  0,  0,    0.0,     0.0], headers ),  # Zero count associations, probably shouldn't even be here. If so, ignore them anyway
                RowItemModel( [ -2, -4,    0,  2,  3,  3,  3,  200.0, 50000.0], headers ),
                RowItemModel( [ -2, -6,    2,  2,  5,  5,  5,  300.0, 11990.0], headers ),
                RowItemModel( [ -3, -1,   20, 23, 23, 23, 23,  400.0, 344990.0], headers ),
                RowItemModel( [ -4, -5,    3,  3, 13, 43, 43,  340.0, 343110.0], headers ),
                RowItemModel( [ -4, -6,   23, 33, 33, 33, 63,  420.0, 245220.0], headers ),
                RowItemModel( [ -4, -7,   23, 33, 33, 33, 63,   40.0, 5420.0], headers ),
                RowItemModel( [ -5, -4,    0,  0, 20, 20, 20,  540.0, 54250.0], headers ),

                RowItemModel( [ -6, -2,    7,   7,   7,   7,   7,  1.0, 1.0], headers ),
                RowItemModel( [ -6, -4,   20,  20,  20,  20,  20,  1.0, 1.0], headers ),
            ]
        for dataModel in dataModels:
            # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval)
            for header in headers:
                if header.startswith("patient_count_"):
                    timeStr = header[len("patient_count_"):]
                    dataModel["count_%s" % timeStr] = dataModel[header]
                    # Copy over value

                    if timeStr != "0" and dataModel[header] > 5:
                        dataModel["count_%s" % timeStr] += 5
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)

        # Indicate that cache data needs to be updated
        self.dataManager = DataManager()
        self.dataManager.clearCacheData("analyzedPatientCount")
        self.dataManager.clearCacheData("clinicalItemCountsUpdated")

        self.recommender = ItemAssociationRecommender()
예제 #13
0
class TestItemRecommender(DBTestCase):
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        log.info("Populate the database with test data")

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = ["clinical_item_id", "clinical_item_category_id", "name"]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC"], headers ),
                RowItemModel( [-2, -1, "BMP"], headers ),
                RowItemModel( [-3, -1, "Hepatic Panel"], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ),
                RowItemModel( [-5, -2, "CXR"], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax"], headers ),
                RowItemModel( [-9, -3, "Acetaminophen"], headers ),
                RowItemModel( [-10, -3, "Carvedilol"], headers ),
                RowItemModel( [-11, -3, "Enoxaparin"], headers ),
                RowItemModel( [-12, -3, "Warfarin"], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone"], headers ),
                RowItemModel( [-14, -4, "Foley Catheter"], headers ),
                RowItemModel( [-15, -4, "Strict I&O"], headers ),
                RowItemModel( [-16, -4, "Fall Precautions"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-4,  -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-10, -22222, -7,  datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-14, -33333, -6,  datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-15, -33333, -2,  datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = \
            [   "clinical_item_id","subsequent_item_id",
                "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any",
                "time_diff_sum", "time_diff_sum_squares",
            ]
        dataModels = \
            [
                RowItemModel( [ -1, -1,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -2, -2,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -3, -3,   95, 95, 97, 97, 97,  0.0, 0.0], headers ),
                RowItemModel( [ -4, -4,   40, 40, 40, 40, 40,  0.0, 0.0], headers ),
                RowItemModel( [ -5, -5,   40, 40, 50, 50, 50,  0.0, 0.0], headers ),
                RowItemModel( [ -6, -6,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),


                RowItemModel( [ -2, -3,    0,  0,  0,  0,  0,    0.0,     0.0], headers ),  # Zero count associations, probably shouldn't even be here. If so, ignore them anyway
                RowItemModel( [ -2, -4,    0,  2,  3,  3,  3,  200.0, 50000.0], headers ),
                RowItemModel( [ -2, -6,    2,  2,  5,  5,  5,  300.0, 11990.0], headers ),
                RowItemModel( [ -3, -1,   20, 23, 23, 23, 23,  400.0, 344990.0], headers ),
                RowItemModel( [ -4, -5,    3,  3, 13, 43, 43,  340.0, 343110.0], headers ),
                RowItemModel( [ -4, -6,   23, 33, 33, 33, 63,  420.0, 245220.0], headers ),
                RowItemModel( [ -4, -7,   23, 33, 33, 33, 63,   40.0, 5420.0], headers ),
                RowItemModel( [ -5, -4,    0,  0, 20, 20, 20,  540.0, 54250.0], headers ),

                RowItemModel( [ -6, -2,    7,   7,   7,   7,   7,  1.0, 1.0], headers ),
                RowItemModel( [ -6, -4,   20,  20,  20,  20,  20,  1.0, 1.0], headers ),
            ]
        for dataModel in dataModels:
            # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval)
            for header in headers:
                if header.startswith("patient_count_"):
                    timeStr = header[len("patient_count_"):]
                    dataModel["count_%s" % timeStr] = dataModel[header]
                    # Copy over value

                    if timeStr != "0" and dataModel[header] > 5:
                        dataModel["count_%s" % timeStr] += 5
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)

        # Indicate that cache data needs to be updated
        self.dataManager = DataManager()
        self.dataManager.clearCacheData("analyzedPatientCount")
        self.dataManager.clearCacheData("clinicalItemCountsUpdated")

        self.recommender = ItemAssociationRecommender()
        # Instance to test on

    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        DBUtil.execute(
            "delete from clinical_item_association where clinical_item_id < 0")
        DBUtil.execute("delete from patient_item where patient_item_id < 0")
        DBUtil.execute("delete from clinical_item where clinical_item_id < 0")
        DBUtil.execute(
            "delete from clinical_item_category where clinical_item_category_id in (%s)"
            % str.join(",", self.clinicalItemCategoryIdStrList))

        DBTestCase.tearDown(self)

    def test_recommender(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.

        query = RecommenderQuery()
        #query.queryItemIds = set();
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        log.debug(
            "Query with no item key input, just return ranks by general likelihood then."
        )
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with key item inputs for which no data exists.  Effecitvely ignore it then, so just return ranks by general likelihood."
        )
        query.queryItemIds = set([-100])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("Query with category filter on recommended results.")
        query.queryItemIds = set([-100])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with category filter and specific exclusion filter on recommended results."
        )
        query.queryItemIds = set([-100])
        query.excludeItemIds = set([-6])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query with a couple of input clinical items + one with no association data (should effectively be ignored)."
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
                RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query but set a limit on time delta worth counting item associations"
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with category limit")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set([-2, -4, -5, -6])
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with specific exclusion")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set([-4, -3, -2])
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

    def test_recommender_aggregation(self):
        # Test different scoring aggregation methods

        query = RecommenderQuery()
        query.countPrefix = "patient_"
        query.queryItemIds = set([-2, -5])
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        headers = ["clinical_item_id", "conditionalFreq", "freqRatio"]

        # Default weighted aggregation method
        expectedData = \
            [   RowItemModel( [-4, 0.3,    22.5], headers ),
                RowItemModel( [-6, 0.16667, 7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Change to unweighted aggregation method
        query.aggregationMethod = "unweighted"
        expectedData = \
            [   RowItemModel( [-4, 0.32857, 24.64286], headers ),
                RowItemModel( [-6, 0.16667,  7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Change to Serial Bayes aggregation method
        query.aggregationMethod = "SerialBayes"
        expectedData = \
            [   RowItemModel( [-4, 0.89157, 66.867471], headers ),
                RowItemModel( [-6, 0.16667,  7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Naive Bayes aggregation
        query.aggregationMethod = "NaiveBayes"
        expectedData = \
            [   RowItemModel( [-4, 3.75,   281.25], headers ),      # Without truncating negative values
                #RowItemModel( [-4, 0.8,    58.59707], headers ),   # With truncating negative values
                RowItemModel( [-6, 0.16667, 7.142857], headers ),
            ]
        recommendedData = self.recommender(query)

        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Apply value filter
        query.fieldFilters["freqRatio>"] = 10.0
        expectedData = \
            [   RowItemModel( [-6, 0.16667, 7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

    def assertEqualRecommendedData(self, expectedData, recommendedData, query):
        """Run assertEqualGeneral on the key components of the contents of the recommendation data.
        Don't necessarily care about the specific numbers that come out of the recommendations,
        but do care about consistency in rankings and relative order by the query.sortField
        """
        lastScore = None
        for expectedItem, recommendedItem in zip(expectedData,
                                                 recommendedData):
            # Ensure derived statistics are populated to enable comparisons
            ItemAssociationRecommender.populateDerivedStats(
                recommendedItem, expectedItem.keys())

            self.assertEqualDict(expectedItem, recommendedItem,
                                 ["clinical_item_id"])
            for key in expectedItem.iterkeys(
            ):  # If specified, then verify a specific values
                if isinstance(expectedItem[key], float):
                    self.assertAlmostEquals(expectedItem[key],
                                            recommendedItem[key], 5)
                else:
                    self.assertEqual(expectedItem[key], recommendedItem[key])
            if lastScore is not None:
                self.assertTrue(recommendedItem[query.sortField] <= lastScore)
                # Verify descending order of scores
            lastScore = recommendedItem[query.sortField]

        self.assertEqual(len(expectedData), len(recommendedData))

    def test_recommender_stats(self):
        # Run the recommender against the mock test data above and verify expected stats calculations

        query = RecommenderQuery()
        query.parseParams \
        (   {   "countPrefix": "patient_",
                "queryItemIds": "-6",
                "resultCount": "3",    # Just get top 3 ranks for simplicity
                "maxRecommendedId": "0", # Artificial constraint to focus only on test data
                "sortField": "P-Fisher",   # Specifically request derived expected vs. observed stats
            }
        )

        log.debug("Query with single item not perturbed by others.")
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "P-Fisher"
        ]
        expectedData = \
            [
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0,  7.0,  0.1,    0.0100, 10.0,       3.7e-06], headers ),
                RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0,  0.286,  0.0133, 21.42857,   1.2e-23], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedDataStats(expectedData, recommendedData,
                                             headers)

        log.debug("Query for non-unique counts.")
        query.parseParams \
        (   {   "countPrefix": "",
                "sortField": "oddsRatio",
            }
        )
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "oddsRatio"
        ]
        expectedData = \
            [   RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0,  0.35714, 0.01333,  26.7857, 107.96296], headers ),
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0,  0.1714,  0.01,     17.1429,  33.47126], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedDataStats(expectedData, recommendedData,
                                             headers)

    def assertEqualRecommendedDataStats(self, expectedData, recommendedData,
                                        headers):
        """Run assertEqualGeneral on the key components of the contents of the recommendation data.
        In this case, we do want to verify actual score / stat values match
        """
        self.assertEqual(len(expectedData), len(recommendedData))
        for expectedItem, recommendedItem in zip(expectedData,
                                                 recommendedData):
            # Ensure the recommendedData has all fields of interest populated / calculated
            ItemAssociationRecommender.populateDerivedStats(
                recommendedItem, headers)
            for header in headers:
                expectedValue = expectedItem[header]
                recommendedValue = recommendedItem[header]
                msg = 'Dicts diff with key (%s).  Verify = %s, Sample = %s' % (
                    header, expectedValue, recommendedValue)
                self.assertAlmostEquals(expectedValue, recommendedValue, 3,
                                        msg)

    def test_recommender_stats_commandline(self):
        # Run the recommender against the mock test data above and verify expected stats calculations
        log.debug("Query with single item not perturbed by others.")
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "P-Fisher"
        ]
        expectedData = \
            [
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0,  7.0,  0.1,    0.0100, 10.0,       3.7e-06], headers ),
                RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0,  0.286,  0.0133, 21.42857,   1.2e-23], headers ),
            ]
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "ItemRecommender.py",
            "maxRecommendedId=0&queryItemIds=-6&countPrefix=patient_&resultCount=3&sortField=P-Fisher",
            "-"
        ]
        self.recommender.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualRecommendedDataStatsTextOutput(expectedData,
                                                       textOutput, headers)

        log.debug("Query for non-unique counts.")
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "oddsRatio"
        ]
        expectedData = \
            [   RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0,  0.35714, 0.01333,  26.7857, 107.96296], headers ),
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0,  0.1714,  0.01,     17.1429,  33.47126], headers ),
            ]
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "ItemRecommender.py",
            "maxRecommendedId=0&queryItemIds=-6&countPrefix=&resultCount=3&sortField=oddsRatio",
            "-"
        ]
        self.recommender.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualRecommendedDataStatsTextOutput(expectedData,
                                                       textOutput, headers)

    def assertEqualRecommendedDataStatsTextOutput(self, expectedData,
                                                  textOutput, headers):
        """Run assertEqualGeneral on the key components of the contents of the recommendation data.
        In this case, we do want to verify actual score / stat values match
        """
        recommendedData = list()
        for dataRow in TabDictReader(textOutput):
            for key, value in dataRow.iteritems():
                if key in headers:
                    dataRow[key] = float(value)
                    # Parse into numerical values for comparison
            recommendedData.append(dataRow)
        self.assertEqualRecommendedDataStats(expectedData, recommendedData,
                                             headers)

    def test_dataCache(self):
        # Test that repeating queries with cache turned on will not result in extra DB queries
        query = RecommenderQuery()
        query.countPrefix = "patient_"
        query.queryItemIds = set([-2, -5])
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        headers = ["clinical_item_id", "conditionalFreq", "freqRatio"]

        # First query without cache
        self.recommender.dataManager.dataCache = None
        baselineData = self.recommender(query)
        baselineQueryCount = self.recommender.dataManager.queryCount

        # Redo query with cache
        self.recommender.dataManager.dataCache = dict()
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        self.assertEqualRecommendedData(baselineData, newData, query)
        # Ensure getting same results
        self.assertNotEqual(baselineQueryCount, newQueryCount)
        # Expect needed more queries since no prior cache
        baselineQueryCount = newQueryCount

        # Again, but should be no new query since have cached results last time
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        self.assertEqualRecommendedData(baselineData, newData, query)
        self.assertEqual(baselineQueryCount, newQueryCount)

        # Repeat multiple times, should still have no new query activity
        # prog = ProgressDots(10,1,"repeats");
        for iRepeat in xrange(10):
            newData = self.recommender(query)
            newQueryCount = self.recommender.dataManager.queryCount
            self.assertEqualRecommendedData(baselineData, newData, query)
            self.assertEqual(baselineQueryCount, newQueryCount)
            # prog.update();
        # prog.printStatus();

        # Query for subset should still yield no new query
        query.queryItemIds = set([-2])
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        baselineData = newData
        # New baseline for subset
        self.assertEqual(baselineQueryCount, newQueryCount)
        # Expect no queries for subsets

        # Repeat query for subset
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        self.assertEqualRecommendedData(baselineData, newData, query)
        self.assertEqual(baselineQueryCount, newQueryCount)
        # Expect no queries for subsets

        # Query for partial subset, partial new
        query.queryItemIds = set([-5, -6])
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        baselineData = newData
        # New baseline for subset
        self.assertEqual(baselineQueryCount, newQueryCount)
        # Expect now new queries for subsets, because first query should have done mass-all query

        # Repeat for partial subset, no longer new
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        baselineData = newData
        # New baseline for subset
        self.assertEqualRecommendedData(baselineData, newData, query)
        self.assertEqual(baselineQueryCount, newQueryCount)
예제 #14
0
class ItemRecommendationTable(BaseDynamicData):
    """Simple script to (dynamically) relay query and result data
    from the ItemRecommendation module in URL request then HTML table format.
    """
    def __init__(self):
        BaseDynamicData.__init__(self)

        self.requestData["queryItemIds"] = ""
        self.requestData["targetItemIds"] = ""
        self.requestData["excludeItemIds"] = ""
        self.requestData["excludeCategoryIds"] = ""
        self.requestData["timeDeltaMax"] = ""
        self.requestData["sortField"] = "PPV"
        self.requestData["sortReverse"] = "True"
        self.requestData["resultCount"] = "10"
        self.requestData["invertQuery"] = ""
        self.requestData["showCounts"] = ""
        self.requestData["countPrefix"] = ""
        self.requestData["aggregationMethod"] = "weighted"

        self.requestData["fieldHeaders"] = ""
        self.requestData["dataRows"] = ""

        self.addHandler("resultCount",
                        ItemRecommendationTable.action_default.__name__)

        self.recommender = ItemAssociationRecommender()
        # Instance to test on
        self.recommender.dataManager.dataCache = webDataCache

    def action_default(self):

        query = RecommenderQuery()
        query.parseParams(self.requestData)
        displayFields = query.getDisplayFields()

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        colNames = [
            "controls", "rank", "name", "description", "category_description"
        ]
        colNames.extend(displayFieldsFormatSuffixed)
        formatter = HtmlResultsFormatter(StringIO(),
                                         valign="middle",
                                         align="center")
        formatter.formatResultDicts(recommendedData, colNames)

        self.requestData["dataRows"] = formatter.getOutFile().getvalue()

    def prepareDisplayHeaders(self, displayFields):
        showCounts = (self.requestData["showCounts"].lower()
                      not in FALSE_STRINGS)

        fieldHeadersHTML = '<th nowrap>' + str.join('</th><th nowrap>',
                                                    displayFields) + '</th>'
        if showCounts:
            fieldHeadersHTML += '<th>' + str.join('</th><th>',
                                                  CORE_FIELDS) + '</th>'

        displayFieldsFormatSuffixed = list()
        for field in displayFields:
            displayFieldsFormatSuffixed.append('%sFormat' % field)
        if showCounts:
            for field in CORE_FIELDS:
                displayFieldsFormatSuffixed.append('%sFormat' % field)

        return (fieldHeadersHTML, displayFieldsFormatSuffixed)

    def prepareResultRow(self, dataModel, displayFields):
        dataModel["controls"] = CONTROLS_TEMPLATE % dataModel
        dataModel["name"] = dataModel["name"].replace(",", "-")

        if "nAB" not in dataModel:
            # Baseline query without query items, use matching numbers to ensure calculations will have something to process
            dataModel["nAB"] = dataModel["nB"]
            dataModel["nA"] = dataModel["N"]
        nAB = dataModel["nAB"]
        nA = dataModel["nA"]
        nB = dataModel["nB"]
        N = dataModel["N"]
        contStats = ContingencyStats(nAB, nA, nB, N)
        contStats.normalize(truncateNegativeValues=False)

        for field in displayFields:
            if field not in dataModel:
                # Unavailable field, see if it is a derived field that can be calculated
                dataModel[field] = contStats[field]

            if field in CORE_FIELDS:
                pass
            elif abs(dataModel[field]) < 0.01:
                # Allow formatting for very small values
                dataModel["%sFormat" % field] = "%.1e" % dataModel[field]
            elif dataModel[field] == sys.float_info.max:
                dataModel["%sFormat" % field] = "MaxOverflow"
                # Symbolic representation of very large value
            else:
                # Default just format as floating point values
                dataModel["%sFormat" % field] = "%.2f" % dataModel[field]

        for field in CORE_FIELDS:
            # Count fields express as integers, assuming available at all
            if field in BASELINE_FIELDS:
                dataModel["%sFormat" % field] = "%d" % dataModel[field]
            else:
                # May have small virtual counts from derived scenarios
                if dataModel[field] > 10:
                    dataModel["%sFormat" % field] = "%.1f" % dataModel[field]
                else:
                    dataModel["%sFormat" % field] = "%.2f" % dataModel[field]
예제 #15
0
    def action_default(self):
        """Look for related orders by association / recommender methods"""
        self.recommender = ItemAssociationRecommender()
        # Instance to test on
        self.recommender.dataManager.dataCache = webDataCache

        query = RecommenderQuery()
        if self.requestData["sortField"] == "":
            self.requestData["sortField"] = "P-YatesChi2-NegLog"
            # P-Fisher-NegLog should yield better results, but beware, much longer to calculate
        query.parseParams(self.requestData)
        if len(query.excludeItemIds) == 0:
            query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds(
            )
        if len(query.excludeCategoryIds) == 0:
            query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds(
            )
        #query.fieldList.extend( ["prevalence","PPV","RR"] );
        displayFields = list()
        if self.requestData["displayFields"] != "":
            displayFields = self.requestData["displayFields"].split(",")

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Try organize by category
        if self.requestData["groupByCategory"]:
            recommendedData = self.recommender.organizeByCategory(
                recommendedData)

        colNames = ["controls"]
        # "name" for code. ,"category_description"
        colNames.extend(displayFieldsFormatSuffixed)
        colNames.extend(["description"])

        lastModel = None
        htmlLines = list()
        for dataModel in recommendedData:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)
예제 #16
0
    order by ic.section, ic.name, ci.name;
    """

resultsTable = DBUtil.execute(existingReferenceOrderQuery)

admitDxIdSectionGuidelineNameTuples = set()
# Keep track of each guideline name set
itemIdsByAdmitDxId = dict()
for admitDxId, sectionName, guidelineName, itemId, itemName, itemDescription, itemCount in resultsTable:
    if admitDxId not in itemIdsByAdmitDxId:
        itemIdsByAdmitDxId[admitDxId] = set()
    itemIdsByAdmitDxId[admitDxId].add(itemId)
    admitDxIdSectionGuidelineNameTuples.add(
        (admitDxId, sectionName, guidelineName))

recommender = ItemAssociationRecommender()

for admitDxId, itemIds in itemIdsByAdmitDxId.iteritems():
    print >> sys.stderr, admitDxId, len(itemIds)
    recQuery = RecommenderQuery()
    recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds()
    recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(
    )
    recQuery.queryItemIds = [admitDxId]
    recQuery.timeDeltaMax = timedelta(1)
    # Within one day
    recQuery.countPrefix = "patient_"
    recQuery.limit = TOP_ITEM_COUNT

    # Top results by P-value
    recQuery.sortField = "P-YatesChi2-NegLog"