예제 #1
0
    def test_recommender(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.

        query = RecommenderQuery()
        #query.queryItemIds = set();
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        log.debug(
            "Query with no item key input, just return ranks by general likelihood then."
        )
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with key item inputs for which no data exists.  Effecitvely ignore it then, so just return ranks by general likelihood."
        )
        query.queryItemIds = set([-100])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("Query with category filter on recommended results.")
        query.queryItemIds = set([-100])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with category filter and specific exclusion filter on recommended results."
        )
        query.queryItemIds = set([-100])
        query.excludeItemIds = set([-6])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query with a couple of input clinical items + one with no association data (should effectively be ignored)."
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
                RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query but set a limit on time delta worth counting item associations"
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with category limit")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set([-2, -4, -5, -6])
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with specific exclusion")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set([-4, -3, -2])
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)
예제 #2
0
    if admitDxId not in itemIdsByAdmitDxId:
        itemIdsByAdmitDxId[admitDxId] = set()
    itemIdsByAdmitDxId[admitDxId].add(itemId)
    admitDxIdSectionGuidelineNameTuples.add(
        (admitDxId, sectionName, guidelineName))

recommender = ItemAssociationRecommender()

for admitDxId, itemIds in itemIdsByAdmitDxId.iteritems():
    print >> sys.stderr, admitDxId, len(itemIds)
    recQuery = RecommenderQuery()
    recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds()
    recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(
    )
    recQuery.queryItemIds = [admitDxId]
    recQuery.timeDeltaMax = timedelta(1)
    # Within one day
    recQuery.countPrefix = "patient_"
    recQuery.limit = TOP_ITEM_COUNT

    # Top results by P-value
    recQuery.sortField = "P-YatesChi2-NegLog"
    results = recommender(recQuery)
    #recommender.formatRecommenderResults(results);
    for result in results:
        itemIds.add(result["clinical_item_id"])
        #print >> sys.stderr, result["description"];

    print >> sys.stderr, admitDxId, len(itemIds)

    # Top results by PPV