Beispiel #1
0
 def build_composite_clinical_item(components, name, description,
                                   category_id):
     """
     Simple wrapper around medinfo/cpoe/DataManager.py
     """
     component_str = ','.join([str(id) for id in components])
     log.debug('(%s, %s, %s) = (%s)' % (name, description, category_id, \
                                         component_str))
     composite_arg = '%s|%s|%s|%s' % (component_str, name, description, \
                                         category_id)
     dm = DataManager()
     dm.main([
         'medinfo/cpoe/DataManager.py', '--compositeRelated', composite_arg
     ])
Beispiel #2
0
 def action_updateCounts(self):
     # Update the summary counts to facilitate future rapid queries
     dataManager = DataManager()
     dataManager.updateClinicalItemCounts()
Beispiel #3
0
class TestOutcomePredictionAnalysis(DBTestCase):
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        log.info("Populate the database with test data")
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
                RowItemModel( [-7, "Admit Dx"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = [
            "clinical_item_id", "clinical_item_category_id", "analysis_status",
            "name"
        ]
        dataModels = \
            [
                RowItemModel( [-1, -1, 1, "CBC"], headers ),
                RowItemModel( [-2, -1, 1, "BMP"], headers ),
                RowItemModel( [-3, -1, 1, "Hepatic Panel"], headers ),
                RowItemModel( [-4, -1, 1, "Cardiac Enzymes"], headers ),
                RowItemModel( [-5, -2, 1, "CXR"], headers ),
                RowItemModel( [-6, -2, 1, "RUQ Ultrasound"], headers ),
                RowItemModel( [-7, -2, 1, "CT Abdomen/Pelvis"], headers ),
                RowItemModel( [-8, -2, 1, "CT PE Thorax"], headers ),
                RowItemModel( [-9, -3, 1, "Acetaminophen"], headers ),
                RowItemModel( [-10, -3, 1, "Carvedilol"], headers ),
                RowItemModel( [-11, -3, 1, "Enoxaparin"], headers ),
                RowItemModel( [-12, -3, 1, "Warfarin"], headers ),
                RowItemModel( [-13, -3, 1, "Ceftriaxone"], headers ),
                RowItemModel( [-14, -4, 1, "Admit"], headers ),
                RowItemModel( [-15, -4, 1, "Discharge"], headers ),
                RowItemModel( [-16, -4, 1, "Readmit"], headers ),

                RowItemModel( [-22, -5, 1, "Diagnosis 2"], headers ),
                RowItemModel( [-23, -5, 1, "Diagnosis 3"], headers ),
                RowItemModel( [-24, -5, 1, "Diagnosis 4"], headers ),

                RowItemModel( [-30, -6, 1, "Troponin (High)"], headers ),
                RowItemModel( [-31, -6, 1, "BNP (High)"], headers ),
                RowItemModel( [-32, -6, 1, "Creatinine (High)"], headers ),
                RowItemModel( [-33, -6, 1, "ESR (High)"], headers ),

                RowItemModel( [-21, -7, 0, "Diagnosis 1"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-52, -11111, -23, datetime(1999, 9, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-51, -11111, -21, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),

                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 1), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ),

                RowItemModel( [-60, -11111, -32, datetime(2000, 1, 1, 4), datetime(2010, 1, 1, 0)], headers ),  # Within query time
                RowItemModel( [-61, -11111, -30, datetime(2000, 1, 4, 0), datetime(2010, 1, 1, 0)], headers ),  # Within 1 week
                RowItemModel( [-62, -11111, -31, datetime(2000, 1,10, 0), datetime(2010, 1, 1, 0)], headers ),  # Past 1 week

                RowItemModel( [-55, -22222, -21, datetime(2000, 1, 8, 0), datetime(2010, 1, 1, 0)], headers ),  # Admit Dx
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 8, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -14, datetime(2000, 1, 8, 1), datetime(2010, 1, 1, 0)], headers ),  # Admit
                RowItemModel( [-14, -22222, -7,  datetime(2000, 1, 8, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-15, -22222, -8,  datetime(2000, 1, 8, 3), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-16, -22222, -15, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),   # Discharge
                RowItemModel( [-56, -22222, -21, datetime(2000, 1,13, 0), datetime(2010, 1, 1, 0)], headers ),  # Admit Dx
                RowItemModel( [-17, -22222, -9,  datetime(2000, 1,13, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-19, -22222, -14, datetime(2000, 1,13, 1), datetime(2010, 1, 1, 0)], headers ),   # Admit (Readmit)
                RowItemModel( [-20, -22222, -10, datetime(2000, 1,13, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-21, -22222, -11, datetime(2000, 1,13, 3), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-22, -22222, -15, datetime(2000, 1,18, 0), datetime(2010, 1, 1, 0)], headers ),   # Discharge
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = \
            [   "clinical_item_id","subsequent_item_id",
                "count_0","count_3600","count_86400","count_604800","count_any",
                "time_diff_sum", "time_diff_sum_squares",
            ]
        dataModels = \
            [
                RowItemModel( [ -1, -1,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -2, -2,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -3, -3,   95, 95, 97, 97, 97,  0.0, 0.0], headers ),
                RowItemModel( [ -4, -4,   40, 40, 40, 40, 40,  0.0, 0.0], headers ),
                RowItemModel( [ -5, -5,   40, 40, 50, 50, 50,  0.0, 0.0], headers ),
                RowItemModel( [ -6, -6,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),
                RowItemModel( [ -7, -7,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),
                RowItemModel( [ -8, -8,   35, 35, 35, 50, 80,  0.0, 0.0], headers ),
                RowItemModel( [-10,-10,   45, 45, 55, 60, 90,  0.0, 0.0], headers ),
                RowItemModel( [-12,-12,   75, 75, 75, 80, 90,  0.0, 0.0], headers ),

                RowItemModel( [-14,-14,    100,  100,  100,  100,  100,  0.0, 0.0], headers ),
                RowItemModel( [-15,-15,    100,  100,  100,  100,  100,  0.0, 0.0], headers ),
                RowItemModel( [-16,-16,    30,  30,  30,  30,  30,  0.0, 0.0], headers ),

                RowItemModel( [-30,-30,    3,  3,  3,  3,  3,  0.0, 0.0], headers ),
                RowItemModel( [-31,-31,    4,  4,  4,  4,  4,  0.0, 0.0], headers ),
                RowItemModel( [-32,-32,    4,  4,  4,  4,  4,  0.0, 0.0], headers ),
                RowItemModel( [-33,-33,    5,  5,  5,  5,  5,  0.0, 0.0], headers ),


                RowItemModel( [ -2, -4,    0,  2,  3,  3,  3,  200.0, 50000.0], headers ),
                RowItemModel( [ -2, -6,    2,  2,  5,  5,  5,  300.0, 11990.0], headers ),
                RowItemModel( [ -3, -1,   20, 23, 23, 23, 23,  400.0, 344990.0], headers ),
                RowItemModel( [ -4, -5,    3,  3, 13, 43, 43,  340.0, 343110.0], headers ),
                RowItemModel( [ -4, -6,   23, 33, 33, 33, 63,  420.0, 245220.0], headers ),
                RowItemModel( [ -4, -7,   27, 33, 33, 33, 63,   40.0, 5420.0], headers ),
                RowItemModel( [ -4,-10,   25, 35, 40, 45, 63,   47.0, 5420.0], headers ),
                RowItemModel( [ -5, -4,    0,  0, 20, 20, 20,  540.0, 54250.0], headers ),

                RowItemModel( [ -6,-16,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
                RowItemModel( [ -8,-16,   5, 5, 5, 5, 5,  0.0, 0.0], headers ),
                RowItemModel( [-10,-16,   8, 8, 8, 8, 8,  0.0, 0.0], headers ),

                RowItemModel( [-10,-30,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
                RowItemModel( [-10,-31,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
                RowItemModel( [-12,-30,   20, 20, 20, 20, 20,  0.0, 0.0], headers ),
                RowItemModel( [-12,-31,   20, 20, 20, 20, 20,  0.0, 0.0], headers ),
                RowItemModel( [-10,-32,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
                RowItemModel( [-10,-33,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)

        # Indicate that cache data needs to be updated
        self.dataManager = DataManager()
        self.dataManager.clearCacheData("analyzedPatientCount")
        self.dataManager.clearCacheData("clinicalItemCountsUpdated")

        # Instance to test on
        self.analyzer = OutcomePredictionAnalysis()
        self.preparer = PreparePatientItems()

    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        DBUtil.execute(
            "delete from clinical_item_association where clinical_item_id < 0")
        DBUtil.execute("delete from patient_item where patient_item_id < 0")
        DBUtil.execute("delete from clinical_item where clinical_item_id < 0")
        DBUtil.execute(
            "delete from clinical_item_category where clinical_item_category_id in (%s)"
            % str.join(",", self.clinicalItemCategoryIdStrList))

        DBTestCase.tearDown(self)

    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        #analysisQuery.recommender = BaselineFrequencyRecommender();
        analysisQuery.recommender = ItemAssociationRecommender()
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-33, -32, -31, -30])
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Initial run without time limits on outcome measure
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, +0, 0.222, +2, 0.611, +1, 0.222, +1, 0.222],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o",
            "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender",
            '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Now try with time limitation on outcome measure
        analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 604800)
        # 1 week
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, +0, 0.222, +2, 0.611, +0, 0.222, +1, 0.222],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t",
            "604800", "-o", "-33,-32,-31,-30", "-m", "0", "-R",
            "ItemAssociationRecommender", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-t", "604800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", "-t", "604800", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Again, but with much stricter time limit (negative test case)
        analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 172800)
        # 2 day
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, 0, 0.0109, 2, 0.0600, 0, 0.0109, 0, 0.0109],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t",
            "172800", "-o", "-33,-32,-31,-30", "-m", "0", "-R",
            "ItemAssociationRecommender", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-t", "172800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", "-t", "172800", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

    def test_tripleSequence_virtualItem(self):
        # Test outcome assessment when the target is a virtual item based on the presence of a triple (instead of double) sequence of items
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-22222])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        analysisQuery.sequenceItemIdsByVirtualItemId[-16] = (-15, -14)
        #analysisQuery.recommender = BaselineFrequencyRecommender();
        analysisQuery.recommender = ItemAssociationRecommender()
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-16])
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Initial run without time limits on outcome measure
        colNames = ["patient_id", "outcome.-16", "score.-16"]
        expectedResults = [RowItemModel([-22222, +1, 0.14286], colNames)]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o",
            "-16=-15:-14", "-m", "0", "-R", "ItemAssociationRecommender",
            '0,-22222', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)
Beispiel #4
0
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        log.info("Populate the database with test data")
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
                RowItemModel( [-7, "Admit Dx"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = [
            "clinical_item_id", "clinical_item_category_id", "analysis_status",
            "name"
        ]
        dataModels = \
            [
                RowItemModel( [-1, -1, 1, "CBC"], headers ),
                RowItemModel( [-2, -1, 1, "BMP"], headers ),
                RowItemModel( [-3, -1, 1, "Hepatic Panel"], headers ),
                RowItemModel( [-4, -1, 1, "Cardiac Enzymes"], headers ),
                RowItemModel( [-5, -2, 1, "CXR"], headers ),
                RowItemModel( [-6, -2, 1, "RUQ Ultrasound"], headers ),
                RowItemModel( [-7, -2, 1, "CT Abdomen/Pelvis"], headers ),
                RowItemModel( [-8, -2, 1, "CT PE Thorax"], headers ),
                RowItemModel( [-9, -3, 1, "Acetaminophen"], headers ),
                RowItemModel( [-10, -3, 1, "Carvedilol"], headers ),
                RowItemModel( [-11, -3, 1, "Enoxaparin"], headers ),
                RowItemModel( [-12, -3, 1, "Warfarin"], headers ),
                RowItemModel( [-13, -3, 1, "Ceftriaxone"], headers ),
                RowItemModel( [-14, -4, 1, "Admit"], headers ),
                RowItemModel( [-15, -4, 1, "Discharge"], headers ),
                RowItemModel( [-16, -4, 1, "Readmit"], headers ),

                RowItemModel( [-22, -5, 1, "Diagnosis 2"], headers ),
                RowItemModel( [-23, -5, 1, "Diagnosis 3"], headers ),
                RowItemModel( [-24, -5, 1, "Diagnosis 4"], headers ),

                RowItemModel( [-30, -6, 1, "Troponin (High)"], headers ),
                RowItemModel( [-31, -6, 1, "BNP (High)"], headers ),
                RowItemModel( [-32, -6, 1, "Creatinine (High)"], headers ),
                RowItemModel( [-33, -6, 1, "ESR (High)"], headers ),

                RowItemModel( [-21, -7, 0, "Diagnosis 1"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-52, -11111, -23, datetime(1999, 9, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-51, -11111, -21, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),

                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 1), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ),

                RowItemModel( [-60, -11111, -32, datetime(2000, 1, 1, 4), datetime(2010, 1, 1, 0)], headers ),  # Within query time
                RowItemModel( [-61, -11111, -30, datetime(2000, 1, 4, 0), datetime(2010, 1, 1, 0)], headers ),  # Within 1 week
                RowItemModel( [-62, -11111, -31, datetime(2000, 1,10, 0), datetime(2010, 1, 1, 0)], headers ),  # Past 1 week

                RowItemModel( [-55, -22222, -21, datetime(2000, 1, 8, 0), datetime(2010, 1, 1, 0)], headers ),  # Admit Dx
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 8, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -14, datetime(2000, 1, 8, 1), datetime(2010, 1, 1, 0)], headers ),  # Admit
                RowItemModel( [-14, -22222, -7,  datetime(2000, 1, 8, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-15, -22222, -8,  datetime(2000, 1, 8, 3), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-16, -22222, -15, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),   # Discharge
                RowItemModel( [-56, -22222, -21, datetime(2000, 1,13, 0), datetime(2010, 1, 1, 0)], headers ),  # Admit Dx
                RowItemModel( [-17, -22222, -9,  datetime(2000, 1,13, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-19, -22222, -14, datetime(2000, 1,13, 1), datetime(2010, 1, 1, 0)], headers ),   # Admit (Readmit)
                RowItemModel( [-20, -22222, -10, datetime(2000, 1,13, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-21, -22222, -11, datetime(2000, 1,13, 3), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-22, -22222, -15, datetime(2000, 1,18, 0), datetime(2010, 1, 1, 0)], headers ),   # Discharge
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = \
            [   "clinical_item_id","subsequent_item_id",
                "count_0","count_3600","count_86400","count_604800","count_any",
                "time_diff_sum", "time_diff_sum_squares",
            ]
        dataModels = \
            [
                RowItemModel( [ -1, -1,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -2, -2,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -3, -3,   95, 95, 97, 97, 97,  0.0, 0.0], headers ),
                RowItemModel( [ -4, -4,   40, 40, 40, 40, 40,  0.0, 0.0], headers ),
                RowItemModel( [ -5, -5,   40, 40, 50, 50, 50,  0.0, 0.0], headers ),
                RowItemModel( [ -6, -6,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),
                RowItemModel( [ -7, -7,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),
                RowItemModel( [ -8, -8,   35, 35, 35, 50, 80,  0.0, 0.0], headers ),
                RowItemModel( [-10,-10,   45, 45, 55, 60, 90,  0.0, 0.0], headers ),
                RowItemModel( [-12,-12,   75, 75, 75, 80, 90,  0.0, 0.0], headers ),

                RowItemModel( [-14,-14,    100,  100,  100,  100,  100,  0.0, 0.0], headers ),
                RowItemModel( [-15,-15,    100,  100,  100,  100,  100,  0.0, 0.0], headers ),
                RowItemModel( [-16,-16,    30,  30,  30,  30,  30,  0.0, 0.0], headers ),

                RowItemModel( [-30,-30,    3,  3,  3,  3,  3,  0.0, 0.0], headers ),
                RowItemModel( [-31,-31,    4,  4,  4,  4,  4,  0.0, 0.0], headers ),
                RowItemModel( [-32,-32,    4,  4,  4,  4,  4,  0.0, 0.0], headers ),
                RowItemModel( [-33,-33,    5,  5,  5,  5,  5,  0.0, 0.0], headers ),


                RowItemModel( [ -2, -4,    0,  2,  3,  3,  3,  200.0, 50000.0], headers ),
                RowItemModel( [ -2, -6,    2,  2,  5,  5,  5,  300.0, 11990.0], headers ),
                RowItemModel( [ -3, -1,   20, 23, 23, 23, 23,  400.0, 344990.0], headers ),
                RowItemModel( [ -4, -5,    3,  3, 13, 43, 43,  340.0, 343110.0], headers ),
                RowItemModel( [ -4, -6,   23, 33, 33, 33, 63,  420.0, 245220.0], headers ),
                RowItemModel( [ -4, -7,   27, 33, 33, 33, 63,   40.0, 5420.0], headers ),
                RowItemModel( [ -4,-10,   25, 35, 40, 45, 63,   47.0, 5420.0], headers ),
                RowItemModel( [ -5, -4,    0,  0, 20, 20, 20,  540.0, 54250.0], headers ),

                RowItemModel( [ -6,-16,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
                RowItemModel( [ -8,-16,   5, 5, 5, 5, 5,  0.0, 0.0], headers ),
                RowItemModel( [-10,-16,   8, 8, 8, 8, 8,  0.0, 0.0], headers ),

                RowItemModel( [-10,-30,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
                RowItemModel( [-10,-31,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
                RowItemModel( [-12,-30,   20, 20, 20, 20, 20,  0.0, 0.0], headers ),
                RowItemModel( [-12,-31,   20, 20, 20, 20, 20,  0.0, 0.0], headers ),
                RowItemModel( [-10,-32,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
                RowItemModel( [-10,-33,   10, 10, 10, 10, 10,  0.0, 0.0], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)

        # Indicate that cache data needs to be updated
        self.dataManager = DataManager()
        self.dataManager.clearCacheData("analyzedPatientCount")
        self.dataManager.clearCacheData("clinicalItemCountsUpdated")

        # Instance to test on
        self.analyzer = OutcomePredictionAnalysis()
        self.preparer = PreparePatientItems()
Beispiel #5
0
class TestDataManager(DBTestCase):
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        log.info("Populate the database with test data")

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = [
            "clinical_item_id", "clinical_item_category_id", "name",
            "analysis_status"
        ]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC",1], headers ),
                RowItemModel( [-2, -1, "BMP",0], headers ), # Clear analysis status, so this will be ignored unless changed
                RowItemModel( [-3, -1, "Hepatic Panel",1], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes",1], headers ),
                RowItemModel( [-5, -2, "CXR",1], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound",1], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis",1], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax",1], headers ),
                RowItemModel( [-9, -3, "Acetaminophen",1], headers ),
                RowItemModel( [-10, -3, "Carvedilol",1], headers ),
                RowItemModel( [-11, -3, "Enoxaparin",1], headers ),
                RowItemModel( [-12, -3, "Warfarin",1], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone",1], headers ),
                RowItemModel( [-14, -4, "Foley Catheter",1], headers ),
                RowItemModel( [-15, -4, "Strict I&O",1], headers ),
                RowItemModel( [-16, -4, "Fall Precautions",1], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)
        self.clinicalItemQuery = \
            """
            select
                clinical_item_id, name, analysis_status, default_recommend
            from
                clinical_item
            where
                clinical_item_id < 0
            order by
                clinical_item_id  desc
            """

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-4,  -11111, -10, datetime(2000, 1, 2, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-10, -22222, -7,  datetime(2000, 1, 5, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 9, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-14, -33333, -6,  datetime(2000, 2, 9, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-15, -33333, -2,  datetime(2000, 2,11, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-16, -33333, -11,  datetime(2000, 2,11, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-17, -33333, -11,  datetime(2001, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)
        self.patientItemQuery = \
            """
            select
                patient_item_id, patient_id, clinical_item_id, item_date, analyze_date
            from
                patient_item
            where
                clinical_item_id < 0
            order by
                patient_id desc, item_date, patient_item_id desc
            """


        headers = [ "clinical_item_id","subsequent_item_id",\
                    "count_0","count_3600","count_86400","count_604800","count_any",
                    "time_diff_sum","time_diff_sum_squares",
                    "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any",
                    "patient_time_diff_sum","patient_time_diff_sum_squares",
                    "patient_count_0","encounter_count_0",
                  ]
        dataModels = \
            [
                RowItemModel( [-11,-11,   3, 3, 3, 3, 4,  999.0, 9999.0,   2, 2, 2, 2, 2,  999.0, 9999.0, 2,2], headers ),
                RowItemModel( [-11, -7,   0, 0, 0, 0, 0,  0.0, 0.0,   0, 0, 0, 0, 0,  0.0, 0.0, 0,0], headers ),
                RowItemModel( [-11, -6,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [-11, -2,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [ -7,-11,   0, 0, 0, 1, 1,  345600.0, 119439360000.0,   0, 0, 0, 1, 1,  345600.0, 119439360000.0, 0,0], headers ),
                RowItemModel( [ -7, -7,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [ -7, -6,   0, 0, 0, 1, 1,  345600.0, 119439360000.0,   0, 0, 0, 1, 1,  345600.0, 119439360000.0, 0,0], headers ),

                RowItemModel( [ -6,-11,   1, 1, 1, 2, 2, 172800.0, 29859840000.0,   1, 1, 1, 2, 2, 172800.0, 29859840000.0, 1,1], headers ),
                RowItemModel( [ -6, -7,   0, 0, 0, 0, 0,  0.0, 0.0,   0, 0, 0, 0, 0,  0.0, 0.0, 0,0], headers ),
                RowItemModel( [ -6, -6,   2, 2, 2, 2, 2,  0.0, 0.0,   2, 2, 2, 2, 2,  0.0, 0.0, 2,2], headers ),
                RowItemModel( [ -6, -2,   0, 0, 0, 1, 1,  172800.0, 29859840000.0,   0, 0, 0, 1, 1,  172800.0, 29859840000.0, 0,0], headers ),

                RowItemModel( [ -2,-11,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [ -2, -7,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [ -2, -6,   0, 0, 0, 0, 0,  0.0, 0.0,   0, 0, 0, 0, 0,  0.0, 0.0, 0,0], headers ),
                RowItemModel( [ -2, -2,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)
        self.clinicalItemAssociationQuery = \
            """
            select
                clinical_item_id, subsequent_item_id,
                count_0,count_3600,count_86400,count_604800,count_any,
                time_diff_sum,time_diff_sum_squares,
                patient_count_0,patient_count_3600,patient_count_86400,patient_count_604800,patient_count_any,
                patient_time_diff_sum, patient_time_diff_sum_squares
            from
                clinical_item_association
            where
                clinical_item_id < 0
            order by
                clinical_item_id, subsequent_item_id
            """

        self.analyzer = DataManager()
        # Instance to test on
        self.analyzer.maxClinicalItemId = 0
        # Avoid testing on "real" data

    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        DBUtil.execute(
            "delete from clinical_item_link where clinical_item_id < 0")
        DBUtil.execute(
            "delete from backup_link_patient_item where patient_item_id < 0")
        DBUtil.execute(
            "delete from clinical_item_association where clinical_item_id < 0")
        DBUtil.execute("delete from patient_item where patient_id < 0")
        DBUtil.execute("delete from clinical_item where clinical_item_id < 0")
        DBUtil.execute(
            "delete from clinical_item_category where clinical_item_category_id in (%s)"
            % str.join(",", self.clinicalItemCategoryIdStrList))

        DBTestCase.tearDown(self)

    def test_deactivateAnalysis(self):
        clinicalItemIds = set([-6, -11])
        self.analyzer.deactivateAnalysis(clinicalItemIds)

        expectedClinicalItemStatus = \
            [
                [-1, "CBC",1, 1],
                [-2, "BMP",0, 1],
                [-3, "Hepatic Panel",1, 1],
                [-4, "Cardiac Enzymes",1, 1],
                [-5, "CXR",1, 1],
                [-6, "RUQ Ultrasound",0, 1],
                [-7, "CT Abdomen/Pelvis",1, 1],
                [-8, "CT PE Thorax",1, 1],
                [-9, "Acetaminophen",1, 1],
                [-10, "Carvedilol",1, 1],
                [-11, "Enoxaparin",0, 1],
                [-12, "Warfarin",1, 1],
                [-13, "Ceftriaxone",1, 1],
                [-14, "Foley Catheter",1, 1],
                [-15, "Strict I&O",1, 1],
                [-16, "Fall Precautions",1, 1],
            ]
        clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery)
        self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus)

        expectedPatientItems = \
            [   # Use placeholder "*" for analyze date, just verify that it exists and is consistent.  Actual value is not important
                [-1,  -11111, -4,  datetime(2000, 1, 1, 0), "*"],
                [-2,  -11111, -10, datetime(2000, 1, 1, 0), "*"],
                [-3,  -11111, -8,  datetime(2000, 1, 1, 2), "*"],
                [-4,  -11111, -10, datetime(2000, 1, 2, 0), "*"],
                [-5,  -11111, -12, datetime(2000, 2, 1, 0), "*"],
                [-10, -22222, -7,  datetime(2000, 1, 5, 0), "*"],
                [-12, -22222, -6,  datetime(2000, 1, 9, 0), None],
                [-13, -22222, -11, datetime(2000, 1, 9, 0), None],
                [-14, -33333, -6,  datetime(2000, 2, 9, 0), None],
                [-15, -33333, -2,  datetime(2000, 2,11, 0), "*"],
                [-16, -33333, -11,  datetime(2000, 2,11, 0), None],
                [-17, -33333, -11,  datetime(2001, 1, 1, 0), None],
            ]
        patientItems = DBUtil.execute(self.patientItemQuery)
        self.assertEqualPatientItems(expectedPatientItems, patientItems)

        expectedAssociationStats = \
            [
                [ -7, -7,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0],
                [ -2, -7,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0],
                [ -2, -2,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0],
            ]
        associationStats = DBUtil.execute(self.clinicalItemAssociationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

    def test_deactivateAnalysisByCount(self):
        thresholdInstanceCount = 1
        categoryIds = [-1, -2]
        self.analyzer.deactivateAnalysisByCount(thresholdInstanceCount,
                                                categoryIds)

        expectedClinicalItemStatus = \
            [
                [-1, "CBC",0, 1],
                [-2, "BMP",0, 1],
                [-3, "Hepatic Panel",0, 1],
                [-4, "Cardiac Enzymes",0, 1],
                [-5, "CXR",0, 1],
                [-6, "RUQ Ultrasound",1, 1],
                [-7, "CT Abdomen/Pelvis",0, 1],
                [-8, "CT PE Thorax",0, 1],
                [-9, "Acetaminophen",1, 1], # Different category, so should be left alone
                [-10, "Carvedilol",1, 1],
                [-11, "Enoxaparin",1, 1],
                [-12, "Warfarin",1, 1],
                [-13, "Ceftriaxone",1, 1],
                [-14, "Foley Catheter",1, 1],
                [-15, "Strict I&O",1, 1],
                [-16, "Fall Precautions",1, 1],
            ]
        clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery)
        self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus)

        expectedPatientItems = \
            [   # Use placeholder "*" for analyze date, just verify that it exists and is consistent.  Actual value is not important
                [-1,  -11111, -4,  datetime(2000, 1, 1, 0), None],
                [-2,  -11111, -10, datetime(2000, 1, 1, 0), "*"],
                [-3,  -11111, -8,  datetime(2000, 1, 1, 2), None],
                [-4,  -11111, -10, datetime(2000, 1, 2, 0), "*"],
                [-5,  -11111, -12, datetime(2000, 2, 1, 0), "*"],
                [-10, -22222, -7,  datetime(2000, 1, 5, 0), None],
                [-12, -22222, -6,  datetime(2000, 1, 9, 0), "*"],
                [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"],
                [-14, -33333, -6,  datetime(2000, 2, 9, 0), "*"],
                [-15, -33333, -2,  datetime(2000, 2,11, 0), None],
                [-16, -33333, -11, datetime(2000, 2,11, 0), "*"],
                [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"],
            ]
        patientItems = DBUtil.execute(self.patientItemQuery)
        self.assertEqualPatientItems(expectedPatientItems, patientItems)

        expectedAssociationStats = \
            [
                [-11,-11,   3, 3, 3, 3, 4,  999.0, 9999.0,   2, 2, 2, 2, 2,  999.0, 9999.0],
                [-11, -6,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0],
                [ -6,-11,   1, 1, 1, 2, 2, 172800.0, 29859840000.0,   1, 1, 1, 2, 2, 172800.0, 29859840000.0],
                [ -6, -6,   2, 2, 2, 2, 2,  0.0, 0.0,   2, 2, 2, 2, 2,  0.0, 0.0],
            ]
        associationStats = DBUtil.execute(self.clinicalItemAssociationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

    def test_compositeRelated(self):
        # Simulate command-line execution
        self.analyzer.main([
            "medinfo/cpoe/DataManager.py", "-c",
            "-2,-4,-8|NewComposite|New Composite Item|-1|-100"
        ])
        #compositeId = self.analyzer.compositeRelated( (-2,-4,-8), "NewComposite","New Composite Item", -1, -100 );

        # Revise the new item ID to a sentinel test value
        expectedClinicalItemStatus = \
            [
                [-1, "CBC",1, 1],
                [-2, "BMP",0, 1],
                [-3, "Hepatic Panel",1, 1],
                [-4, "Cardiac Enzymes",1, 1],
                [-5, "CXR",1, 1],
                [-6, "RUQ Ultrasound",1, 1],
                [-7, "CT Abdomen/Pelvis",1, 1],
                [-8, "CT PE Thorax",1, 1],
                [-9, "Acetaminophen",1, 1],
                [-10, "Carvedilol",1, 1],
                [-11, "Enoxaparin",1, 1],
                [-12, "Warfarin",1, 1],
                [-13, "Ceftriaxone",1, 1],
                [-14, "Foley Catheter",1, 1],
                [-15, "Strict I&O",1, 1],
                [-16, "Fall Precautions",1, 1],

                [-100,"NewComposite", 1, 0],    # Remove from default recommend list
            ]
        clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery)
        self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus)

        expectedPatientItems = \
            [   # Use placeholder "*" for analyze date, just verify that it exists and is consistent.  Actual value is not important
                # Likewise, use None for primary ID key whose specific value is unimportant
                [None,-11111,-100, datetime(2000, 1, 1, 0), None],
                [-1,  -11111, -4,  datetime(2000, 1, 1, 0), "*"],
                [-2,  -11111, -10, datetime(2000, 1, 1, 0), "*"],
                [None,-11111,-100, datetime(2000, 1, 1, 2), None],
                [-3,  -11111, -8,  datetime(2000, 1, 1, 2), "*"],
                [-4,  -11111, -10, datetime(2000, 1, 2, 0), "*"],
                [-5,  -11111, -12, datetime(2000, 2, 1, 0), "*"],
                [-10, -22222, -7,  datetime(2000, 1, 5, 0), "*"],
                [-12, -22222, -6,  datetime(2000, 1, 9, 0), "*"],
                [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"],
                [-14, -33333, -6,  datetime(2000, 2, 9, 0), "*"],
                [None,-33333,-100, datetime(2000, 2,11, 0), None],
                [-15, -33333, -2,  datetime(2000, 2,11, 0), "*"],
                [-16, -33333, -11, datetime(2000, 2,11, 0), "*"],
                [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"],
            ]
        patientItems = DBUtil.execute(self.patientItemQuery)
        self.assertEqualPatientItems(expectedPatientItems, patientItems)

        # Check for tracking link records
        linkQuery = \
            """
            select
                clinical_item_id,  linked_item_id
            from
                clinical_item_link
            where
                clinical_item_id < 0
            order by
                clinical_item_id desc, linked_item_id desc
            """
        expectedItems = \
            [
                [-100,-2],
                [-100,-4],
                [-100,-8],
            ]
        actualItems = DBUtil.execute(linkQuery)
        self.assertEqualTable(expectedItems, actualItems)

        log.debug("Test incremental update via command-line")
        self.analyzer.main(["medinfo/cpoe/DataManager.py", "-g", "-6|-100"])
        #self.analyzer.generatePatientItemsForCompositeId( (-6,), -100 );

        expectedPatientItems = \
            [   # Use placeholder "*" for analyze date, just verify that it exists and is consistent.  Actual value is not important
                # Likewise, use None for primary ID key whose specific value is unimportant
                [None,-11111,-100, datetime(2000, 1, 1, 0), None],
                [-1,  -11111, -4,  datetime(2000, 1, 1, 0), "*"],
                [-2,  -11111, -10, datetime(2000, 1, 1, 0), "*"],
                [None,-11111,-100, datetime(2000, 1, 1, 2), None],
                [-3,  -11111, -8,  datetime(2000, 1, 1, 2), "*"],
                [-4,  -11111, -10, datetime(2000, 1, 2, 0), "*"],
                [-5,  -11111, -12, datetime(2000, 2, 1, 0), "*"],
                [-10, -22222, -7,  datetime(2000, 1, 5, 0), "*"],
                [None,-22222,-100, datetime(2000, 1, 9, 0), None],
                [-12, -22222, -6,  datetime(2000, 1, 9, 0), "*"],
                [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"],
                [None,-33333,-100, datetime(2000, 2, 9, 0), None],
                [-14, -33333, -6,  datetime(2000, 2, 9, 0), "*"],
                [None,-33333,-100, datetime(2000, 2,11, 0), None],
                [-15, -33333, -2,  datetime(2000, 2,11, 0), "*"],
                [-16, -33333, -11, datetime(2000, 2,11, 0), "*"],
                [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"],
            ]
        patientItems = DBUtil.execute(self.patientItemQuery)
        self.assertEqualPatientItems(expectedPatientItems, patientItems)

        # Check for tracking link records
        expectedItems = \
            [
                [-100,-2],
                [-100,-4],
                [-100,-6],
                [-100,-8],
            ]
        actualItems = DBUtil.execute(linkQuery)
        self.assertEqualTable(expectedItems, actualItems)

        log.debug("Test inherited update")
        self.analyzer.main([
            "medinfo/cpoe/DataManager.py", "-c",
            "-7,-100|InheritingComposite|Inheriting Composite Item|-1|-101"
        ])
        #compositeId = self.analyzer.compositeRelated( (-7,-100), "InheritingComposite","Inheriting Composite Item", -1, -101 );
        # Revise the new item ID to a sentinel test value
        expectedClinicalItemStatus = \
            [
                [-1, "CBC",1, 1],
                [-2, "BMP",0, 1],
                [-3, "Hepatic Panel",1, 1],
                [-4, "Cardiac Enzymes",1, 1],
                [-5, "CXR",1, 1],
                [-6, "RUQ Ultrasound",1, 1],
                [-7, "CT Abdomen/Pelvis",1, 1],
                [-8, "CT PE Thorax",1, 1],
                [-9, "Acetaminophen",1, 1],
                [-10, "Carvedilol",1, 1],
                [-11, "Enoxaparin",1, 1],
                [-12, "Warfarin",1, 1],
                [-13, "Ceftriaxone",1, 1],
                [-14, "Foley Catheter",1, 1],
                [-15, "Strict I&O",1, 1],
                [-16, "Fall Precautions",1, 1],

                [-100,"NewComposite", 1, 0],
                [-101,"InheritingComposite", 1, 0],
            ]
        clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery)
        self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus)

        expectedPatientItems = \
            [   # Use placeholder "*" for analyze date, just verify that it exists and is consistent.  Actual value is not important
                # Likewise, use None for primary ID key whose specific value is unimportant
                [None,-11111,-101, datetime(2000, 1, 1, 0), None],
                [None,-11111,-100, datetime(2000, 1, 1, 0), None],
                [-1,  -11111, -4,  datetime(2000, 1, 1, 0), "*"],
                [-2,  -11111, -10, datetime(2000, 1, 1, 0), "*"],
                [None,-11111,-101, datetime(2000, 1, 1, 2), None],
                [None,-11111,-100, datetime(2000, 1, 1, 2), None],
                [-3,  -11111, -8,  datetime(2000, 1, 1, 2), "*"],
                [-4,  -11111, -10, datetime(2000, 1, 2, 0), "*"],
                [-5,  -11111, -12, datetime(2000, 2, 1, 0), "*"],
                [None,-22222,-101, datetime(2000, 1, 5, 0), None],
                [-10, -22222, -7,  datetime(2000, 1, 5, 0), "*"],
                [None,-22222,-101, datetime(2000, 1, 9, 0), None],
                [None,-22222,-100, datetime(2000, 1, 9, 0), None],
                [-12, -22222, -6,  datetime(2000, 1, 9, 0), "*"],
                [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"],
                [None,-33333,-101, datetime(2000, 2, 9, 0), None],
                [None,-33333,-100, datetime(2000, 2, 9, 0), None],
                [-14, -33333, -6,  datetime(2000, 2, 9, 0), "*"],
                [None,-33333,-101, datetime(2000, 2,11, 0), None],
                [None,-33333,-100, datetime(2000, 2,11, 0), None],
                [-15, -33333, -2,  datetime(2000, 2,11, 0), "*"],
                [-16, -33333, -11, datetime(2000, 2,11, 0), "*"],
                [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"],
            ]
        patientItems = DBUtil.execute(self.patientItemQuery)
        self.assertEqualPatientItems(expectedPatientItems, patientItems)

        # Check for tracking link records
        expectedItems = \
            [
                [-100,-2],
                [-100,-4],
                [-100,-6],
                [-100,-8],
                [-101,-7],
                [-101,-100],
            ]
        actualItems = DBUtil.execute(linkQuery)
        self.assertEqualTable(expectedItems, actualItems)

    def test_mergeRelated(self):
        self.analyzer.mergeRelated(-6, (-7, -2))

        expectedClinicalItemStatus = \
            [
                [-1, "CBC",1, 1],
                [-2, "BMP",0, 1],
                [-3, "Hepatic Panel",1, 1],
                [-4, "Cardiac Enzymes",1, 1],
                [-5, "CXR",1, 1],
                [-6, "RUQ Ultrasound+BMP+CT Abdomen/Pelvis",1, 1],
                [-7, "CT Abdomen/Pelvis",0, 1],
                [-8, "CT PE Thorax",1, 1],
                [-9, "Acetaminophen",1, 1],
                [-10, "Carvedilol",1, 1],
                [-11, "Enoxaparin",1, 1],
                [-12, "Warfarin",1, 1],
                [-13, "Ceftriaxone",1, 1],
                [-14, "Foley Catheter",1, 1],
                [-15, "Strict I&O",1, 1],
                [-16, "Fall Precautions",1, 1],
            ]
        clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery)
        self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus)

        expectedPatientItems = \
            [   # Use placeholder "*" for analyze date, just verify that it exists and is consistent.  Actual value is not important
                [-1,  -11111, -4,  datetime(2000, 1, 1, 0), "*"],
                [-2,  -11111, -10, datetime(2000, 1, 1, 0), "*"],
                [-3,  -11111, -8,  datetime(2000, 1, 1, 2), "*"],
                [-4,  -11111, -10, datetime(2000, 1, 2, 0), "*"],
                [-5,  -11111, -12, datetime(2000, 2, 1, 0), "*"],
                [-10, -22222, -6,  datetime(2000, 1, 5, 0), None],  # Reassign
                [-12, -22222, -6,  datetime(2000, 1, 9, 0), "*"],
                [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"],
                [-14, -33333, -6,  datetime(2000, 2, 9, 0), "*"],
                [-15, -33333, -6,  datetime(2000, 2,11, 0), None],  # Reassign
                [-16, -33333, -11, datetime(2000, 2,11, 0), "*"],
                [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"],
            ]
        patientItems = DBUtil.execute(self.patientItemQuery)
        self.assertEqualPatientItems(expectedPatientItems, patientItems)

        expectedAssociationStats = \
            [
                [-11,-11,   3, 3, 3, 3, 4,  999.0, 9999.0,   2, 2, 2, 2, 2,  999.0, 9999.0],
                [-11, -6,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0],

                [ -6,-11,   1, 1, 1, 2, 2, 172800.0, 29859840000.0,   1, 1, 1, 2, 2, 172800.0, 29859840000.0],
                [ -6, -6,   2, 2, 2, 2, 2,  0.0, 0.0,   2, 2, 2, 2, 2,  0.0, 0.0],
            ]
        associationStats = DBUtil.execute(self.clinicalItemAssociationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        # Check for backup of lost data
        backupQuery = \
            """
            select
                patient_item_id, clinical_item_id
            from
                backup_link_patient_item
            where
                clinical_item_id < 0
            order by
                patient_item_id desc, clinical_item_id
            """
        expectedBackupItems = \
            [
                [-10,-7],
                [-15,-2],
            ]
        backupItems = DBUtil.execute(backupQuery)
        self.assertEqualTable(expectedBackupItems, backupItems)

    def test_unifyRedundant(self):
        self.analyzer.unifyRedundant(-7, (-7, -2))

        expectedClinicalItemStatus = \
            [
                [-1, "CBC",1, 1],
                [-2, "BMP",0, 1],
                [-3, "Hepatic Panel",1, 1],
                [-4, "Cardiac Enzymes",1, 1],
                [-5, "CXR",1, 1],
                [-6, "RUQ Ultrasound",1, 1],
                [-7, "CT Abdomen/Pelvis+BMP",1, 1],
                [-8, "CT PE Thorax",1, 1],
                [-9, "Acetaminophen",1, 1],
                [-10, "Carvedilol",1, 1],
                [-11, "Enoxaparin",1, 1],
                [-12, "Warfarin",1, 1],
                [-13, "Ceftriaxone",1, 1],
                [-14, "Foley Catheter",1, 1],
                [-15, "Strict I&O",1, 1],
                [-16, "Fall Precautions",1, 1],
            ]
        clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery)
        self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus)

        expectedPatientItems = \
            [   # Use placeholder "*" for analyze date, just verify that it exists and is consistent.  Actual value is not important
                [-1,  -11111, -4,  datetime(2000, 1, 1, 0), "*"],
                [-2,  -11111, -10, datetime(2000, 1, 1, 0), "*"],
                [-3,  -11111, -8,  datetime(2000, 1, 1, 2), "*"],
                [-4,  -11111, -10, datetime(2000, 1, 2, 0), "*"],
                [-5,  -11111, -12, datetime(2000, 2, 1, 0), "*"],
                [-10, -22222, -7,  datetime(2000, 1, 5, 0), "*"],
                [-12, -22222, -6,  datetime(2000, 1, 9, 0), "*"],
                [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"],
                [-14, -33333, -6,  datetime(2000, 2, 9, 0), "*"],
                [-15, -33333, -2,  datetime(2000, 2,11, 0), None],
                [-16, -33333, -11, datetime(2000, 2,11, 0), "*"],
                [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"],
            ]
        patientItems = DBUtil.execute(self.patientItemQuery)
        self.assertEqualPatientItems(expectedPatientItems, patientItems)

        expectedAssociationStats = \
            [
                [-11,-11,   3, 3, 3, 3, 4,  999.0, 9999.0,   2, 2, 2, 2, 2,  999.0, 9999.0],
                [-11, -7,   0, 0, 0, 0, 0,  0.0, 0.0,   0, 0, 0, 0, 0,  0.0, 0.0],
                [-11, -6,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0],
                [ -7,-11,   0, 0, 0, 1, 1,  345600.0, 119439360000.0,   0, 0, 0, 1, 1,  345600.0, 119439360000.0],
                [ -7, -7,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0],
                [ -7, -6,   0, 0, 0, 1, 1,  345600.0, 119439360000.0,   0, 0, 0, 1, 1,  345600.0, 119439360000.0],

                [ -6,-11,   1, 1, 1, 2, 2, 172800.0, 29859840000.0,   1, 1, 1, 2, 2, 172800.0, 29859840000.0],
                [ -6, -7,   0, 0, 0, 0, 0,  0.0, 0.0,   0, 0, 0, 0, 0,  0.0, 0.0],
                [ -6, -6,   2, 2, 2, 2, 2,  0.0, 0.0,   2, 2, 2, 2, 2,  0.0, 0.0],
            ]
        associationStats = DBUtil.execute(self.clinicalItemAssociationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

    def assertEqualPatientItems(self, expectedPatientItems, patientItems):
        """Patch the expected items to look for whatever is the set analyze_date,
        and just adjust so expect will be present and consistent.  Don't care about specific value.
        Likewise, don't care about primary key patient_item_id new values
        """
        expectedAnalyzeDate = None
        for row in patientItems:
            analyzeDate = row[-1]
            if analyzeDate is not None:
                expectedAnalyzeDate = analyzeDate
                break
        for row in expectedPatientItems:
            if expectedAnalyzeDate is not None and row[-1] is not None:
                row[-1] = expectedAnalyzeDate

        for (expectedRow, actualRow) in zip(expectedPatientItems,
                                            patientItems):
            if expectedRow[0] is None:
                expectedRow[0] = actualRow[0]

        self.assertEqualTable(expectedPatientItems, patientItems)

    def test_updateClinicalItemCounts(self):
        self.analyzer.updateClinicalItemCounts()

        clinicalItemQueryClinicalCounts = \
            """
            select
                clinical_item_id, name, analysis_status, item_count, patient_count, patient_count, encounter_count
            from
                clinical_item
            where
                clinical_item_id < 0
            order by
                clinical_item_id  desc
            """
        # Expect counts to default to zero if no values known
        expectedClinicalItemCounts = \
            [
                [-1, "CBC",1, 0, 0, 0, 0],
                [-2, "BMP",0, 1, 1, 1, 1],
                [-3, "Hepatic Panel",1, 0, 0, 0, 0],
                [-4, "Cardiac Enzymes",1, 0, 0, 0, 0],
                [-5, "CXR",1, 0, 0, 0, 0],
                [-6, "RUQ Ultrasound",1, 2, 2, 2, 2],
                [-7, "CT Abdomen/Pelvis",1, 1, 1, 1, 1],
                [-8, "CT PE Thorax",1, 0, 0, 0, 0],
                [-9, "Acetaminophen",1, 0, 0, 0, 0],
                [-10, "Carvedilol",1, 0, 0, 0, 0],
                [-11, "Enoxaparin",1, 3, 2, 2, 2], # Two instances occur for the same patient
                [-12, "Warfarin",1, 0, 0, 0, 0],
                [-13, "Ceftriaxone",1, 0, 0, 0, 0],
                [-14, "Foley Catheter",1, 0, 0, 0, 0],
                [-15, "Strict I&O",1, 0, 0, 0, 0],
                [-16, "Fall Precautions",1, 0, 0, 0, 0],
            ]
        clinicalItemCounts = DBUtil.execute(
            clinicalItemQueryClinicalCounts
        )  #Queries test DB to see what is stored in there
        self.assertEqualTable(expectedClinicalItemCounts, clinicalItemCounts)

    def test_resetAssociationModel(self):

        self.analyzer.updateClinicalItemCounts()
        # Generate clinical item counts based on patient item data

        ciaCount = DBUtil.execute(
            "select count(*) from clinical_item_association")[0][0]
        piCount = DBUtil.execute("select count(*) from patient_item")[0][0]
        piAnalyzedCount = DBUtil.execute(
            "select count(*) from patient_item where analyze_date is not null"
        )[0][0]
        cacheCount = DBUtil.execute(
            "select count(*) from data_cache where data_key in ('analyzedPatientCount')"
        )[0][0]
        itemCountSummary = DBUtil.execute(
            "select sum(item_count) from clinical_item")[0][0]

        self.assertTrue(ciaCount > 0)
        self.assertTrue(piCount > 0)
        self.assertTrue(piAnalyzedCount > 0)
        #self.assertTrue(cacheCount > 0);
        self.assertTrue(itemCountSummary > 0)

        self.analyzer.resetAssociationModel()

        ciaCount2 = DBUtil.execute(
            "select count(*) from clinical_item_association")[0][0]
        piCount2 = DBUtil.execute("select count(*) from patient_item")[0][0]
        piAnalyzedCount2 = DBUtil.execute(
            "select count(*) from patient_item where analyze_date is not null"
        )[0][0]
        cacheCount2 = DBUtil.execute(
            "select count(*) from data_cache where data_key in ('analyzedPatientCount')"
        )[0][0]
        itemCountSummary2 = DBUtil.execute(
            "select sum(item_count) from clinical_item")[0][0]

        self.assertEqual(0, ciaCount2)
        self.assertEqual(piCount, piCount2)
        self.assertEqual(0, piAnalyzedCount2)
        self.assertEqual(0, cacheCount2)
        self.assertEqual(0, itemCountSummary2)
Beispiel #6
0
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        log.info("Populate the database with test data")

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = [
            "clinical_item_id", "clinical_item_category_id", "name",
            "analysis_status"
        ]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC",1], headers ),
                RowItemModel( [-2, -1, "BMP",0], headers ), # Clear analysis status, so this will be ignored unless changed
                RowItemModel( [-3, -1, "Hepatic Panel",1], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes",1], headers ),
                RowItemModel( [-5, -2, "CXR",1], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound",1], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis",1], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax",1], headers ),
                RowItemModel( [-9, -3, "Acetaminophen",1], headers ),
                RowItemModel( [-10, -3, "Carvedilol",1], headers ),
                RowItemModel( [-11, -3, "Enoxaparin",1], headers ),
                RowItemModel( [-12, -3, "Warfarin",1], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone",1], headers ),
                RowItemModel( [-14, -4, "Foley Catheter",1], headers ),
                RowItemModel( [-15, -4, "Strict I&O",1], headers ),
                RowItemModel( [-16, -4, "Fall Precautions",1], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)
        self.clinicalItemQuery = \
            """
            select
                clinical_item_id, name, analysis_status, default_recommend
            from
                clinical_item
            where
                clinical_item_id < 0
            order by
                clinical_item_id  desc
            """

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-4,  -11111, -10, datetime(2000, 1, 2, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-10, -22222, -7,  datetime(2000, 1, 5, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 9, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-14, -33333, -6,  datetime(2000, 2, 9, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-15, -33333, -2,  datetime(2000, 2,11, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-16, -33333, -11,  datetime(2000, 2,11, 0), datetime(2100, 1, 1, 0)], headers ),
                RowItemModel( [-17, -33333, -11,  datetime(2001, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)
        self.patientItemQuery = \
            """
            select
                patient_item_id, patient_id, clinical_item_id, item_date, analyze_date
            from
                patient_item
            where
                clinical_item_id < 0
            order by
                patient_id desc, item_date, patient_item_id desc
            """


        headers = [ "clinical_item_id","subsequent_item_id",\
                    "count_0","count_3600","count_86400","count_604800","count_any",
                    "time_diff_sum","time_diff_sum_squares",
                    "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any",
                    "patient_time_diff_sum","patient_time_diff_sum_squares",
                    "patient_count_0","encounter_count_0",
                  ]
        dataModels = \
            [
                RowItemModel( [-11,-11,   3, 3, 3, 3, 4,  999.0, 9999.0,   2, 2, 2, 2, 2,  999.0, 9999.0, 2,2], headers ),
                RowItemModel( [-11, -7,   0, 0, 0, 0, 0,  0.0, 0.0,   0, 0, 0, 0, 0,  0.0, 0.0, 0,0], headers ),
                RowItemModel( [-11, -6,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [-11, -2,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [ -7,-11,   0, 0, 0, 1, 1,  345600.0, 119439360000.0,   0, 0, 0, 1, 1,  345600.0, 119439360000.0, 0,0], headers ),
                RowItemModel( [ -7, -7,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [ -7, -6,   0, 0, 0, 1, 1,  345600.0, 119439360000.0,   0, 0, 0, 1, 1,  345600.0, 119439360000.0, 0,0], headers ),

                RowItemModel( [ -6,-11,   1, 1, 1, 2, 2, 172800.0, 29859840000.0,   1, 1, 1, 2, 2, 172800.0, 29859840000.0, 1,1], headers ),
                RowItemModel( [ -6, -7,   0, 0, 0, 0, 0,  0.0, 0.0,   0, 0, 0, 0, 0,  0.0, 0.0, 0,0], headers ),
                RowItemModel( [ -6, -6,   2, 2, 2, 2, 2,  0.0, 0.0,   2, 2, 2, 2, 2,  0.0, 0.0, 2,2], headers ),
                RowItemModel( [ -6, -2,   0, 0, 0, 1, 1,  172800.0, 29859840000.0,   0, 0, 0, 1, 1,  172800.0, 29859840000.0, 0,0], headers ),

                RowItemModel( [ -2,-11,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [ -2, -7,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
                RowItemModel( [ -2, -6,   0, 0, 0, 0, 0,  0.0, 0.0,   0, 0, 0, 0, 0,  0.0, 0.0, 0,0], headers ),
                RowItemModel( [ -2, -2,   1, 1, 1, 1, 1,  0.0, 0.0,   1, 1, 1, 1, 1,  0.0, 0.0, 1,1], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)
        self.clinicalItemAssociationQuery = \
            """
            select
                clinical_item_id, subsequent_item_id,
                count_0,count_3600,count_86400,count_604800,count_any,
                time_diff_sum,time_diff_sum_squares,
                patient_count_0,patient_count_3600,patient_count_86400,patient_count_604800,patient_count_any,
                patient_time_diff_sum, patient_time_diff_sum_squares
            from
                clinical_item_association
            where
                clinical_item_id < 0
            order by
                clinical_item_id, subsequent_item_id
            """

        self.analyzer = DataManager()
        # Instance to test on
        self.analyzer.maxClinicalItemId = 0
Beispiel #7
0
class TestItemRecommender(DBTestCase):
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        log.info("Populate the database with test data")

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = ["clinical_item_id", "clinical_item_category_id", "name"]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC"], headers ),
                RowItemModel( [-2, -1, "BMP"], headers ),
                RowItemModel( [-3, -1, "Hepatic Panel"], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ),
                RowItemModel( [-5, -2, "CXR"], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax"], headers ),
                RowItemModel( [-9, -3, "Acetaminophen"], headers ),
                RowItemModel( [-10, -3, "Carvedilol"], headers ),
                RowItemModel( [-11, -3, "Enoxaparin"], headers ),
                RowItemModel( [-12, -3, "Warfarin"], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone"], headers ),
                RowItemModel( [-14, -4, "Foley Catheter"], headers ),
                RowItemModel( [-15, -4, "Strict I&O"], headers ),
                RowItemModel( [-16, -4, "Fall Precautions"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-4,  -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-10, -22222, -7,  datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-14, -33333, -6,  datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-15, -33333, -2,  datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = \
            [   "clinical_item_id","subsequent_item_id",
                "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any",
                "time_diff_sum", "time_diff_sum_squares",
            ]
        dataModels = \
            [
                RowItemModel( [ -1, -1,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -2, -2,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -3, -3,   95, 95, 97, 97, 97,  0.0, 0.0], headers ),
                RowItemModel( [ -4, -4,   40, 40, 40, 40, 40,  0.0, 0.0], headers ),
                RowItemModel( [ -5, -5,   40, 40, 50, 50, 50,  0.0, 0.0], headers ),
                RowItemModel( [ -6, -6,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),


                RowItemModel( [ -2, -3,    0,  0,  0,  0,  0,    0.0,     0.0], headers ),  # Zero count associations, probably shouldn't even be here. If so, ignore them anyway
                RowItemModel( [ -2, -4,    0,  2,  3,  3,  3,  200.0, 50000.0], headers ),
                RowItemModel( [ -2, -6,    2,  2,  5,  5,  5,  300.0, 11990.0], headers ),
                RowItemModel( [ -3, -1,   20, 23, 23, 23, 23,  400.0, 344990.0], headers ),
                RowItemModel( [ -4, -5,    3,  3, 13, 43, 43,  340.0, 343110.0], headers ),
                RowItemModel( [ -4, -6,   23, 33, 33, 33, 63,  420.0, 245220.0], headers ),
                RowItemModel( [ -4, -7,   23, 33, 33, 33, 63,   40.0, 5420.0], headers ),
                RowItemModel( [ -5, -4,    0,  0, 20, 20, 20,  540.0, 54250.0], headers ),

                RowItemModel( [ -6, -2,    7,   7,   7,   7,   7,  1.0, 1.0], headers ),
                RowItemModel( [ -6, -4,   20,  20,  20,  20,  20,  1.0, 1.0], headers ),
            ]
        for dataModel in dataModels:
            # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval)
            for header in headers:
                if header.startswith("patient_count_"):
                    timeStr = header[len("patient_count_"):]
                    dataModel["count_%s" % timeStr] = dataModel[header]
                    # Copy over value

                    if timeStr != "0" and dataModel[header] > 5:
                        dataModel["count_%s" % timeStr] += 5
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)

        # Indicate that cache data needs to be updated
        self.dataManager = DataManager()
        self.dataManager.clearCacheData("analyzedPatientCount")
        self.dataManager.clearCacheData("clinicalItemCountsUpdated")

        self.recommender = ItemAssociationRecommender()
        # Instance to test on

    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        DBUtil.execute(
            "delete from clinical_item_association where clinical_item_id < 0")
        DBUtil.execute("delete from patient_item where patient_item_id < 0")
        DBUtil.execute("delete from clinical_item where clinical_item_id < 0")
        DBUtil.execute(
            "delete from clinical_item_category where clinical_item_category_id in (%s)"
            % str.join(",", self.clinicalItemCategoryIdStrList))

        DBTestCase.tearDown(self)

    def test_recommender(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.

        query = RecommenderQuery()
        #query.queryItemIds = set();
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        log.debug(
            "Query with no item key input, just return ranks by general likelihood then."
        )
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with key item inputs for which no data exists.  Effecitvely ignore it then, so just return ranks by general likelihood."
        )
        query.queryItemIds = set([-100])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("Query with category filter on recommended results.")
        query.queryItemIds = set([-100])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with category filter and specific exclusion filter on recommended results."
        )
        query.queryItemIds = set([-100])
        query.excludeItemIds = set([-6])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query with a couple of input clinical items + one with no association data (should effectively be ignored)."
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
                RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query but set a limit on time delta worth counting item associations"
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with category limit")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set([-2, -4, -5, -6])
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with specific exclusion")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set([-4, -3, -2])
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

    def test_recommender_aggregation(self):
        # Test different scoring aggregation methods

        query = RecommenderQuery()
        query.countPrefix = "patient_"
        query.queryItemIds = set([-2, -5])
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        headers = ["clinical_item_id", "conditionalFreq", "freqRatio"]

        # Default weighted aggregation method
        expectedData = \
            [   RowItemModel( [-4, 0.3,    22.5], headers ),
                RowItemModel( [-6, 0.16667, 7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Change to unweighted aggregation method
        query.aggregationMethod = "unweighted"
        expectedData = \
            [   RowItemModel( [-4, 0.32857, 24.64286], headers ),
                RowItemModel( [-6, 0.16667,  7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Change to Serial Bayes aggregation method
        query.aggregationMethod = "SerialBayes"
        expectedData = \
            [   RowItemModel( [-4, 0.89157, 66.867471], headers ),
                RowItemModel( [-6, 0.16667,  7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Naive Bayes aggregation
        query.aggregationMethod = "NaiveBayes"
        expectedData = \
            [   RowItemModel( [-4, 3.75,   281.25], headers ),      # Without truncating negative values
                #RowItemModel( [-4, 0.8,    58.59707], headers ),   # With truncating negative values
                RowItemModel( [-6, 0.16667, 7.142857], headers ),
            ]
        recommendedData = self.recommender(query)

        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Apply value filter
        query.fieldFilters["freqRatio>"] = 10.0
        expectedData = \
            [   RowItemModel( [-6, 0.16667, 7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

    def assertEqualRecommendedData(self, expectedData, recommendedData, query):
        """Run assertEqualGeneral on the key components of the contents of the recommendation data.
        Don't necessarily care about the specific numbers that come out of the recommendations,
        but do care about consistency in rankings and relative order by the query.sortField
        """
        lastScore = None
        for expectedItem, recommendedItem in zip(expectedData,
                                                 recommendedData):
            # Ensure derived statistics are populated to enable comparisons
            ItemAssociationRecommender.populateDerivedStats(
                recommendedItem, expectedItem.keys())

            self.assertEqualDict(expectedItem, recommendedItem,
                                 ["clinical_item_id"])
            for key in expectedItem.iterkeys(
            ):  # If specified, then verify a specific values
                if isinstance(expectedItem[key], float):
                    self.assertAlmostEquals(expectedItem[key],
                                            recommendedItem[key], 5)
                else:
                    self.assertEqual(expectedItem[key], recommendedItem[key])
            if lastScore is not None:
                self.assertTrue(recommendedItem[query.sortField] <= lastScore)
                # Verify descending order of scores
            lastScore = recommendedItem[query.sortField]

        self.assertEqual(len(expectedData), len(recommendedData))

    def test_recommender_stats(self):
        # Run the recommender against the mock test data above and verify expected stats calculations

        query = RecommenderQuery()
        query.parseParams \
        (   {   "countPrefix": "patient_",
                "queryItemIds": "-6",
                "resultCount": "3",    # Just get top 3 ranks for simplicity
                "maxRecommendedId": "0", # Artificial constraint to focus only on test data
                "sortField": "P-Fisher",   # Specifically request derived expected vs. observed stats
            }
        )

        log.debug("Query with single item not perturbed by others.")
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "P-Fisher"
        ]
        expectedData = \
            [
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0,  7.0,  0.1,    0.0100, 10.0,       3.7e-06], headers ),
                RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0,  0.286,  0.0133, 21.42857,   1.2e-23], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedDataStats(expectedData, recommendedData,
                                             headers)

        log.debug("Query for non-unique counts.")
        query.parseParams \
        (   {   "countPrefix": "",
                "sortField": "oddsRatio",
            }
        )
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "oddsRatio"
        ]
        expectedData = \
            [   RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0,  0.35714, 0.01333,  26.7857, 107.96296], headers ),
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0,  0.1714,  0.01,     17.1429,  33.47126], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedDataStats(expectedData, recommendedData,
                                             headers)

    def assertEqualRecommendedDataStats(self, expectedData, recommendedData,
                                        headers):
        """Run assertEqualGeneral on the key components of the contents of the recommendation data.
        In this case, we do want to verify actual score / stat values match
        """
        self.assertEqual(len(expectedData), len(recommendedData))
        for expectedItem, recommendedItem in zip(expectedData,
                                                 recommendedData):
            # Ensure the recommendedData has all fields of interest populated / calculated
            ItemAssociationRecommender.populateDerivedStats(
                recommendedItem, headers)
            for header in headers:
                expectedValue = expectedItem[header]
                recommendedValue = recommendedItem[header]
                msg = 'Dicts diff with key (%s).  Verify = %s, Sample = %s' % (
                    header, expectedValue, recommendedValue)
                self.assertAlmostEquals(expectedValue, recommendedValue, 3,
                                        msg)

    def test_recommender_stats_commandline(self):
        # Run the recommender against the mock test data above and verify expected stats calculations
        log.debug("Query with single item not perturbed by others.")
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "P-Fisher"
        ]
        expectedData = \
            [
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0,  7.0,  0.1,    0.0100, 10.0,       3.7e-06], headers ),
                RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0,  0.286,  0.0133, 21.42857,   1.2e-23], headers ),
            ]
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "ItemRecommender.py",
            "maxRecommendedId=0&queryItemIds=-6&countPrefix=patient_&resultCount=3&sortField=P-Fisher",
            "-"
        ]
        self.recommender.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualRecommendedDataStatsTextOutput(expectedData,
                                                       textOutput, headers)

        log.debug("Query for non-unique counts.")
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "oddsRatio"
        ]
        expectedData = \
            [   RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0,  0.35714, 0.01333,  26.7857, 107.96296], headers ),
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0,  0.1714,  0.01,     17.1429,  33.47126], headers ),
            ]
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "ItemRecommender.py",
            "maxRecommendedId=0&queryItemIds=-6&countPrefix=&resultCount=3&sortField=oddsRatio",
            "-"
        ]
        self.recommender.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualRecommendedDataStatsTextOutput(expectedData,
                                                       textOutput, headers)

    def assertEqualRecommendedDataStatsTextOutput(self, expectedData,
                                                  textOutput, headers):
        """Run assertEqualGeneral on the key components of the contents of the recommendation data.
        In this case, we do want to verify actual score / stat values match
        """
        recommendedData = list()
        for dataRow in TabDictReader(textOutput):
            for key, value in dataRow.iteritems():
                if key in headers:
                    dataRow[key] = float(value)
                    # Parse into numerical values for comparison
            recommendedData.append(dataRow)
        self.assertEqualRecommendedDataStats(expectedData, recommendedData,
                                             headers)

    def test_dataCache(self):
        # Test that repeating queries with cache turned on will not result in extra DB queries
        query = RecommenderQuery()
        query.countPrefix = "patient_"
        query.queryItemIds = set([-2, -5])
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        headers = ["clinical_item_id", "conditionalFreq", "freqRatio"]

        # First query without cache
        self.recommender.dataManager.dataCache = None
        baselineData = self.recommender(query)
        baselineQueryCount = self.recommender.dataManager.queryCount

        # Redo query with cache
        self.recommender.dataManager.dataCache = dict()
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        self.assertEqualRecommendedData(baselineData, newData, query)
        # Ensure getting same results
        self.assertNotEqual(baselineQueryCount, newQueryCount)
        # Expect needed more queries since no prior cache
        baselineQueryCount = newQueryCount

        # Again, but should be no new query since have cached results last time
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        self.assertEqualRecommendedData(baselineData, newData, query)
        self.assertEqual(baselineQueryCount, newQueryCount)

        # Repeat multiple times, should still have no new query activity
        # prog = ProgressDots(10,1,"repeats");
        for iRepeat in xrange(10):
            newData = self.recommender(query)
            newQueryCount = self.recommender.dataManager.queryCount
            self.assertEqualRecommendedData(baselineData, newData, query)
            self.assertEqual(baselineQueryCount, newQueryCount)
            # prog.update();
        # prog.printStatus();

        # Query for subset should still yield no new query
        query.queryItemIds = set([-2])
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        baselineData = newData
        # New baseline for subset
        self.assertEqual(baselineQueryCount, newQueryCount)
        # Expect no queries for subsets

        # Repeat query for subset
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        self.assertEqualRecommendedData(baselineData, newData, query)
        self.assertEqual(baselineQueryCount, newQueryCount)
        # Expect no queries for subsets

        # Query for partial subset, partial new
        query.queryItemIds = set([-5, -6])
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        baselineData = newData
        # New baseline for subset
        self.assertEqual(baselineQueryCount, newQueryCount)
        # Expect now new queries for subsets, because first query should have done mass-all query

        # Repeat for partial subset, no longer new
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        baselineData = newData
        # New baseline for subset
        self.assertEqualRecommendedData(baselineData, newData, query)
        self.assertEqual(baselineQueryCount, newQueryCount)
Beispiel #8
0
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        log.info("Populate the database with test data")

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = ["clinical_item_id", "clinical_item_category_id", "name"]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC"], headers ),
                RowItemModel( [-2, -1, "BMP"], headers ),
                RowItemModel( [-3, -1, "Hepatic Panel"], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ),
                RowItemModel( [-5, -2, "CXR"], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax"], headers ),
                RowItemModel( [-9, -3, "Acetaminophen"], headers ),
                RowItemModel( [-10, -3, "Carvedilol"], headers ),
                RowItemModel( [-11, -3, "Enoxaparin"], headers ),
                RowItemModel( [-12, -3, "Warfarin"], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone"], headers ),
                RowItemModel( [-14, -4, "Foley Catheter"], headers ),
                RowItemModel( [-15, -4, "Strict I&O"], headers ),
                RowItemModel( [-16, -4, "Fall Precautions"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-4,  -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-10, -22222, -7,  datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-14, -33333, -6,  datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-15, -33333, -2,  datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = \
            [   "clinical_item_id","subsequent_item_id",
                "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any",
                "time_diff_sum", "time_diff_sum_squares",
            ]
        dataModels = \
            [
                RowItemModel( [ -1, -1,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -2, -2,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -3, -3,   95, 95, 97, 97, 97,  0.0, 0.0], headers ),
                RowItemModel( [ -4, -4,   40, 40, 40, 40, 40,  0.0, 0.0], headers ),
                RowItemModel( [ -5, -5,   40, 40, 50, 50, 50,  0.0, 0.0], headers ),
                RowItemModel( [ -6, -6,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),


                RowItemModel( [ -2, -3,    0,  0,  0,  0,  0,    0.0,     0.0], headers ),  # Zero count associations, probably shouldn't even be here. If so, ignore them anyway
                RowItemModel( [ -2, -4,    0,  2,  3,  3,  3,  200.0, 50000.0], headers ),
                RowItemModel( [ -2, -6,    2,  2,  5,  5,  5,  300.0, 11990.0], headers ),
                RowItemModel( [ -3, -1,   20, 23, 23, 23, 23,  400.0, 344990.0], headers ),
                RowItemModel( [ -4, -5,    3,  3, 13, 43, 43,  340.0, 343110.0], headers ),
                RowItemModel( [ -4, -6,   23, 33, 33, 33, 63,  420.0, 245220.0], headers ),
                RowItemModel( [ -4, -7,   23, 33, 33, 33, 63,   40.0, 5420.0], headers ),
                RowItemModel( [ -5, -4,    0,  0, 20, 20, 20,  540.0, 54250.0], headers ),

                RowItemModel( [ -6, -2,    7,   7,   7,   7,   7,  1.0, 1.0], headers ),
                RowItemModel( [ -6, -4,   20,  20,  20,  20,  20,  1.0, 1.0], headers ),
            ]
        for dataModel in dataModels:
            # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval)
            for header in headers:
                if header.startswith("patient_count_"):
                    timeStr = header[len("patient_count_"):]
                    dataModel["count_%s" % timeStr] = dataModel[header]
                    # Copy over value

                    if timeStr != "0" and dataModel[header] > 5:
                        dataModel["count_%s" % timeStr] += 5
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)

        # Indicate that cache data needs to be updated
        self.dataManager = DataManager()
        self.dataManager.clearCacheData("analyzedPatientCount")
        self.dataManager.clearCacheData("clinicalItemCountsUpdated")

        self.recommender = ItemAssociationRecommender()
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        log.info("Populate the database with test data")
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = ["clinical_item_id", "clinical_item_category_id", "name"]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC"], headers ),
                RowItemModel( [-2, -1, "BMP"], headers ),
                RowItemModel( [-3, -1, "Hepatic Panel"], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ),
                RowItemModel( [-5, -2, "CXR"], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax"], headers ),
                RowItemModel( [-9, -3, "Acetaminophen"], headers ),
                RowItemModel( [-10, -3, "Carvedilol"], headers ),
                RowItemModel( [-11, -3, "Enoxaparin"], headers ),
                RowItemModel( [-12, -3, "Warfarin"], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone"], headers ),
                RowItemModel( [-14, -4, "Foley Catheter"], headers ),
                RowItemModel( [-15, -4, "Strict I&O"], headers ),
                RowItemModel( [-16, -4, "Fall Precautions"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 1), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-4,  -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-10, -22222, -7,  datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-14, -33333, -6,  datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-15, -33333, -2,  datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = \
            [   "clinical_item_id","subsequent_item_id",
                "count_0","count_3600","count_86400","count_604800","count_any",
                "time_diff_sum", "time_diff_sum_squares",
            ]
        dataModels = \
            [
                RowItemModel( [ -1, -1,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -2, -2,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -3, -3,   95, 95, 97, 97, 97,  0.0, 0.0], headers ),
                RowItemModel( [ -4, -4,  240,240,240,240,240,  0.0, 0.0], headers ),
                RowItemModel( [ -5, -5,   40, 40, 50, 50, 50,  0.0, 0.0], headers ),
                RowItemModel( [ -6, -6,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),
                RowItemModel( [ -7, -7,   35, 35, 35, 50, 80,  0.0, 0.0], headers ),
                RowItemModel( [ -8, -8,   35, 35, 35, 50, 80,  0.0, 0.0], headers ),
                RowItemModel( [-10,-10,   45, 45, 55, 60, 90,  0.0, 0.0], headers ),
                RowItemModel( [-12,-12,   75, 75, 75, 80, 90,  0.0, 0.0], headers ),



                RowItemModel( [ -2, -4,    0,  2,  3,  3,  3,  200.0, 50000.0], headers ),
                RowItemModel( [ -2, -6,    2,  2,  5,  5,  5,  300.0, 11990.0], headers ),
                RowItemModel( [ -3, -1,   20, 23, 23, 23, 23,  400.0, 344990.0], headers ),
                RowItemModel( [ -4, -5,    3,  3, 13, 43, 43,  340.0, 343110.0], headers ),
                RowItemModel( [ -4, -6,   23, 33, 33, 33, 63,  420.0, 245220.0], headers ),
                RowItemModel( [ -4, -7,   27, 33, 33, 33, 83,   40.0, 5420.0], headers ),
                RowItemModel( [ -4, -8,    1,  2,  3,  4,  5,   40.0, 5420.0], headers ),
                RowItemModel( [ -4,-10,   25, 35, 40, 45, 73,   47.0, 5420.0], headers ),
                RowItemModel( [ -5, -4,    0,  0, 20, 20, 20,  540.0, 54250.0], headers ),
                RowItemModel( [-10, -8,    2,  4,  6,  8, 10,   47.0, 5420.0], headers ),
                RowItemModel( [-10, -12,  12, 14, 16, 18, 20,   47.0, 5420.0], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)

        # Indicate that cache data needs tobe updated
        self.dataManager = DataManager()
        self.dataManager.clearCacheData("analyzedPatientCount")
        self.dataManager.clearCacheData("clinicalItemCountsUpdated")

        # Instance to test on
        self.analyzer = RecommendationRankingTrendAnalysis()
class TestRecommendationRankingTrendAnalysis(DBTestCase):
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        log.info("Populate the database with test data")
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = ["clinical_item_id", "clinical_item_category_id", "name"]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC"], headers ),
                RowItemModel( [-2, -1, "BMP"], headers ),
                RowItemModel( [-3, -1, "Hepatic Panel"], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ),
                RowItemModel( [-5, -2, "CXR"], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax"], headers ),
                RowItemModel( [-9, -3, "Acetaminophen"], headers ),
                RowItemModel( [-10, -3, "Carvedilol"], headers ),
                RowItemModel( [-11, -3, "Enoxaparin"], headers ),
                RowItemModel( [-12, -3, "Warfarin"], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone"], headers ),
                RowItemModel( [-14, -4, "Foley Catheter"], headers ),
                RowItemModel( [-15, -4, "Strict I&O"], headers ),
                RowItemModel( [-16, -4, "Fall Precautions"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "patient_id", "clinical_item_id", "item_date",
            "analyze_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -11111, -4,  datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -11111, -10, datetime(2000, 1, 1, 1), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -11111, -8,  datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-4,  -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-5,  -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-10, -22222, -7,  datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-12, -22222, -6,  datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-14, -33333, -6,  datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ),
                RowItemModel( [-15, -33333, -2,  datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = \
            [   "clinical_item_id","subsequent_item_id",
                "count_0","count_3600","count_86400","count_604800","count_any",
                "time_diff_sum", "time_diff_sum_squares",
            ]
        dataModels = \
            [
                RowItemModel( [ -1, -1,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -2, -2,   30, 30, 30, 30, 30,  0.0, 0.0], headers ),
                RowItemModel( [ -3, -3,   95, 95, 97, 97, 97,  0.0, 0.0], headers ),
                RowItemModel( [ -4, -4,  240,240,240,240,240,  0.0, 0.0], headers ),
                RowItemModel( [ -5, -5,   40, 40, 50, 50, 50,  0.0, 0.0], headers ),
                RowItemModel( [ -6, -6,   70, 70, 70, 70, 70,  0.0, 0.0], headers ),
                RowItemModel( [ -7, -7,   35, 35, 35, 50, 80,  0.0, 0.0], headers ),
                RowItemModel( [ -8, -8,   35, 35, 35, 50, 80,  0.0, 0.0], headers ),
                RowItemModel( [-10,-10,   45, 45, 55, 60, 90,  0.0, 0.0], headers ),
                RowItemModel( [-12,-12,   75, 75, 75, 80, 90,  0.0, 0.0], headers ),



                RowItemModel( [ -2, -4,    0,  2,  3,  3,  3,  200.0, 50000.0], headers ),
                RowItemModel( [ -2, -6,    2,  2,  5,  5,  5,  300.0, 11990.0], headers ),
                RowItemModel( [ -3, -1,   20, 23, 23, 23, 23,  400.0, 344990.0], headers ),
                RowItemModel( [ -4, -5,    3,  3, 13, 43, 43,  340.0, 343110.0], headers ),
                RowItemModel( [ -4, -6,   23, 33, 33, 33, 63,  420.0, 245220.0], headers ),
                RowItemModel( [ -4, -7,   27, 33, 33, 33, 83,   40.0, 5420.0], headers ),
                RowItemModel( [ -4, -8,    1,  2,  3,  4,  5,   40.0, 5420.0], headers ),
                RowItemModel( [ -4,-10,   25, 35, 40, 45, 73,   47.0, 5420.0], headers ),
                RowItemModel( [ -5, -4,    0,  0, 20, 20, 20,  540.0, 54250.0], headers ),
                RowItemModel( [-10, -8,    2,  4,  6,  8, 10,   47.0, 5420.0], headers ),
                RowItemModel( [-10, -12,  12, 14, 16, 18, 20,   47.0, 5420.0], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_association",
                                              dataModel)

        # Indicate that cache data needs tobe updated
        self.dataManager = DataManager()
        self.dataManager.clearCacheData("analyzedPatientCount")
        self.dataManager.clearCacheData("clinicalItemCountsUpdated")

        # Instance to test on
        self.analyzer = RecommendationRankingTrendAnalysis()

    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        DBUtil.execute(
            "delete from clinical_item_association where clinical_item_id < 0")
        DBUtil.execute("delete from patient_item where patient_item_id < 0")
        DBUtil.execute("delete from clinical_item where clinical_item_id < 0")
        DBUtil.execute(
            "delete from clinical_item_category where clinical_item_category_id in (%s)"
            % str.join(",", self.clinicalItemCategoryIdStrList))

        DBTestCase.tearDown(self)

    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111])
        analysisQuery.recommender = BaselineFrequencyRecommender()
        #analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Don't use items whose default is to be excluded from recommendations
        #recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(conn=conn);
        #recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds(conn=conn);
        #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) );  # Time delta to use for queries, otherwise just default to all times

        colNames = [
            "patient_id", "clinical_item_id", "iItem", "iRecItem", "recRank",
            "recScore"
        ]

        # Start with default recommender
        expectedResults = \
            [
                (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.170),    Don't care about specific scores, as long as ranks are correct
                (-11111,-10, 1, 1, 4, SENTINEL_ANY_FLOAT),    #0.032),
                (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT),    #0.025),
                (-11111,-12, 4, 3, 2, SENTINEL_ANY_FLOAT),    #0.053),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)

        # Now try targeted recommender
        analysisQuery.recommender = ItemAssociationRecommender()
        expectedResults = \
            [   (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.167),
                (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT),    #0.304),
                (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT),    #0.190),
                (-11111,-12, 4, 3, 1, SENTINEL_ANY_FLOAT),    #0.444),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)

        # Repeat, but put a limit on maximum number of query items and recommendations we want analyzed
        analysisQuery.queryItemMax = 2
        expectedResults = \
            [   (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.167),
                (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT),    #0.304),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)
Beispiel #11
0
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        log.info("Populate the database with test data")
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = [
            "clinical_item_id", "clinical_item_category_id", "name",
            "analysis_status"
        ]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC",1], headers ),
                RowItemModel( [-2, -1, "BMP",0], headers ), # Clear analysis status, so this will be ignored unless changed
                RowItemModel( [-3, -1, "Hepatic Panel",1], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes",1], headers ),
                RowItemModel( [-5, -2, "CXR",1], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound",1], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis",1], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax",1], headers ),
                RowItemModel( [-9, -3, "Acetaminophen",1], headers ),
                RowItemModel( [-10, -3, "Carvedilol",1], headers ),
                RowItemModel( [-11, -3, "Enoxaparin",1], headers ),
                RowItemModel( [-12, -3, "Warfarin",1], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone",1], headers ),
                RowItemModel( [-14, -4, "Foley Catheter",1], headers ),
                RowItemModel( [-15, -4, "Strict I&O",1], headers ),
                RowItemModel( [-16, -4, "Fall Precautions",1], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "encounter_id", "patient_id",
            "clinical_item_id", "item_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -111,   -11111, -4,  datetime(2000, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -111,   -11111, -10, datetime(2000, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -111,   -11111, -8,  datetime(2000, 1, 1, 2)], headers ),
                RowItemModel( [-4,  -112,   -11111, -10, datetime(2000, 1, 2, 0)], headers ),
                RowItemModel( [-5,  -112,   -11111, -12, datetime(2000, 2, 1, 0)], headers ),
                RowItemModel( [-10, -222,   -22222, -7,  datetime(2000, 1, 5, 0)], headers ),
                RowItemModel( [-12, -222,   -22222, -6,  datetime(2000, 1, 9, 0)], headers ),
                RowItemModel( [-13, -222,   -22222, -11, datetime(2000, 1, 9, 0)], headers ),
                RowItemModel( [-95, -222,   -22222, -9,  datetime(2000, 1,10, 0)], headers ),
                RowItemModel( [-94, -333,   -33333, -8,  datetime(2000, 1,10, 0)], headers ),    # In first window delta unit only
                RowItemModel( [-14, -333,   -33333, -6,  datetime(2000, 2, 9, 0)], headers ),
                RowItemModel( [-15, -333,   -33333, -2,  datetime(2000, 2,11, 0)], headers ),  # Will set clinical_item_link inheritances to this item to only record certain associations
                RowItemModel( [-16, -333,   -33333, -11, datetime(2000, 2,11, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = ["clinical_item_id", "linked_item_id"]
        dataModels = \
            [   # Don't have direct, but instead demonstrate inherited relationship from 6 to 2 will still be recognized
                RowItemModel( [-6, -4], headers ),
                RowItemModel( [-4, -2], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_link", dataModel)

        self.decayAnalyzer = DecayingWindows(
        )  # DecayingWindows instance to test on, *** remember to change database to medinfo_copy
        self.dataManager = DataManager()
Beispiel #12
0
class TestDecayingWindows(DBTestCase):
    def setUp(self):
        """Prepare state for test cases"""
        DBTestCase.setUp(self)

        log.info("Populate the database with test data")
        from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader
        ClinicalItemDataLoader.build_clinical_item_psql_schemata()

        self.clinicalItemCategoryIdStrList = list()
        headers = ["clinical_item_category_id", "source_table"]
        dataModels = \
            [
                RowItemModel( [-1, "Labs"], headers ),
                RowItemModel( [-2, "Imaging"], headers ),
                RowItemModel( [-3, "Meds"], headers ),
                RowItemModel( [-4, "Nursing"], headers ),
                RowItemModel( [-5, "Problems"], headers ),
                RowItemModel( [-6, "Lab Results"], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_category",
                                              dataModel)
            self.clinicalItemCategoryIdStrList.append(str(dataItemId))

        headers = [
            "clinical_item_id", "clinical_item_category_id", "name",
            "analysis_status"
        ]
        dataModels = \
            [
                RowItemModel( [-1, -1, "CBC",1], headers ),
                RowItemModel( [-2, -1, "BMP",0], headers ), # Clear analysis status, so this will be ignored unless changed
                RowItemModel( [-3, -1, "Hepatic Panel",1], headers ),
                RowItemModel( [-4, -1, "Cardiac Enzymes",1], headers ),
                RowItemModel( [-5, -2, "CXR",1], headers ),
                RowItemModel( [-6, -2, "RUQ Ultrasound",1], headers ),
                RowItemModel( [-7, -2, "CT Abdomen/Pelvis",1], headers ),
                RowItemModel( [-8, -2, "CT PE Thorax",1], headers ),
                RowItemModel( [-9, -3, "Acetaminophen",1], headers ),
                RowItemModel( [-10, -3, "Carvedilol",1], headers ),
                RowItemModel( [-11, -3, "Enoxaparin",1], headers ),
                RowItemModel( [-12, -3, "Warfarin",1], headers ),
                RowItemModel( [-13, -3, "Ceftriaxone",1], headers ),
                RowItemModel( [-14, -4, "Foley Catheter",1], headers ),
                RowItemModel( [-15, -4, "Strict I&O",1], headers ),
                RowItemModel( [-16, -4, "Fall Precautions",1], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel)

        headers = [
            "patient_item_id", "encounter_id", "patient_id",
            "clinical_item_id", "item_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1,  -111,   -11111, -4,  datetime(2000, 1, 1, 0)], headers ),
                RowItemModel( [-2,  -111,   -11111, -10, datetime(2000, 1, 1, 0)], headers ),
                RowItemModel( [-3,  -111,   -11111, -8,  datetime(2000, 1, 1, 2)], headers ),
                RowItemModel( [-4,  -112,   -11111, -10, datetime(2000, 1, 2, 0)], headers ),
                RowItemModel( [-5,  -112,   -11111, -12, datetime(2000, 2, 1, 0)], headers ),
                RowItemModel( [-10, -222,   -22222, -7,  datetime(2000, 1, 5, 0)], headers ),
                RowItemModel( [-12, -222,   -22222, -6,  datetime(2000, 1, 9, 0)], headers ),
                RowItemModel( [-13, -222,   -22222, -11, datetime(2000, 1, 9, 0)], headers ),
                RowItemModel( [-95, -222,   -22222, -9,  datetime(2000, 1,10, 0)], headers ),
                RowItemModel( [-94, -333,   -33333, -8,  datetime(2000, 1,10, 0)], headers ),    # In first window delta unit only
                RowItemModel( [-14, -333,   -33333, -6,  datetime(2000, 2, 9, 0)], headers ),
                RowItemModel( [-15, -333,   -33333, -2,  datetime(2000, 2,11, 0)], headers ),  # Will set clinical_item_link inheritances to this item to only record certain associations
                RowItemModel( [-16, -333,   -33333, -11, datetime(2000, 2,11, 0)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        headers = ["clinical_item_id", "linked_item_id"]
        dataModels = \
            [   # Don't have direct, but instead demonstrate inherited relationship from 6 to 2 will still be recognized
                RowItemModel( [-6, -4], headers ),
                RowItemModel( [-4, -2], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("clinical_item_link", dataModel)

        self.decayAnalyzer = DecayingWindows(
        )  # DecayingWindows instance to test on, *** remember to change database to medinfo_copy
        self.dataManager = DataManager()

    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        DBUtil.execute(
            "delete from clinical_item_link where clinical_item_id < 0")
        DBUtil.execute(
            "delete from clinical_item_association where clinical_item_id < 0")
        DBUtil.execute("delete from patient_item where patient_item_id < 0")
        DBUtil.execute("delete from clinical_item where clinical_item_id < 0")
        DBUtil.execute(
            "delete from clinical_item_category where clinical_item_category_id in (%s)"
            % str.join(",", self.clinicalItemCategoryIdStrList))

        # Purge temporary buffer files. May not match exact name if modified for other purpose
        for filename in os.listdir("."):
            if filename.startswith(TEMP_FILENAME):
                os.remove(filename)

        DBTestCase.tearDown(self)

    def test_decayingWindowsFromBuffer(self):

        associationQuery = \
            """
            select
                clinical_item_id, subsequent_item_id,
                count_0, count_3600, count_86400, count_604800,
                count_2592000, count_7776000, count_31536000,
                count_any
            from
                clinical_item_association
            where
                clinical_item_id < 0
            order by
                clinical_item_id, subsequent_item_id
            """

        decayAnalysisOptions = DecayAnalysisOptions()
        decayAnalysisOptions.startD = datetime(2000, 1, 9)
        decayAnalysisOptions.endD = datetime(2000, 2, 11)
        #decayAnalysisOptions.windowLength = 10
        decayAnalysisOptions.decay = 0.9
        decayAnalysisOptions.delta = timedelta(weeks=4)
        decayAnalysisOptions.patientIds = [-22222, -33333]
        decayAnalysisOptions.outputFile = TEMP_FILENAME

        self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions)

        expectedAssociationStats = \
            [
                [-11,-11,   1.9, 1.9, 1.9, 1.9, 1.9, 0, 0, 1.9],    # Note that decaying windows approach will not try to update counts for time periods longer than the delta period
                [-11, -9,   0.0, 0.0, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [-11, -8,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted. Consider future upgrade. Don't train on all time ever, but train on two deltas at a time, sliding / shifting window so do catch the overlap ranges
                [-11, -6,   0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [ -9,-11,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -9, -9,   0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [ -9, -8,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -9, -6,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -8,-11,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted.
                [ -8, -9,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted.
                [ -8, -8,   0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [ -8, -6,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted.
                [ -6,-11,   0.9, 0.9, 0.9, 1.9, 1.9, 0, 0, 1.9],
                [ -6, -9,   0.0, 0.0, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [ -6, -8,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted.
                [ -6, -6,   1.9, 1.9, 1.9, 1.9, 1.9, 0, 0, 1.9],
            ]

        associationStats = DBUtil.execute(associationQuery)
        #for row in expectedAssociationStats:
        #    print >> sys.stderr, row;
        #print >> sys.stderr, "============"
        #for row in associationStats:
        #    print >> sys.stderr, row;
        #print >> sys.stderr, "============"
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        expectedItemBaseCountById = \
            {
                -1: 0,
                -2: 0,
                -3: 0,
                -4: 0,
                -5: 0,
                -6: 1.9,
                -7: 0,
                -8: 0.9,
                -9: 0.9,
                -10: 0,
                -11: 1.9,
                -12: 0,
                -13: 0,
                -14: 0,
                -15: 0,
                -16: 0,
            }
        itemBaseCountById = self.dataManager.loadClinicalItemBaseCountByItemId(
        )
        #print >> sys.stderr, itemBaseCountById;
        self.assertEqualDict(expectedItemBaseCountById, itemBaseCountById)

        ######## Reset the model data and rerun with different decay parameters
        self.dataManager.resetAssociationModel()

        decayAnalysisOptions = DecayAnalysisOptions()
        decayAnalysisOptions.startD = datetime(2000, 1, 9)
        decayAnalysisOptions.endD = datetime(2000, 2, 11)
        decayAnalysisOptions.windowLength = 4
        # Just specify window length, then should calculate decay parameter
        #decayAnalysisOptions.decay = 0.9
        decayAnalysisOptions.delta = timedelta(weeks=4)
        decayAnalysisOptions.patientIds = [-22222, -33333]
        decayAnalysisOptions.outputFile = TEMP_FILENAME

        self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions)

        expectedAssociationStats = \
            [
                [-11,-11,   1.75, 1.75, 1.75, 1.75, 1.75, 0, 0, 1.75],
                [-11, -9,   0.0, 0.0, 0.75, 0.75, 0.75, 0, 0, 0.75],
                [-11, -8,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [-11, -6,   0.75, 0.75, 0.75, 0.75, 0.75, 0, 0, 0.75],
                [ -9,-11,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -9, -9,   0.75, 0.75, 0.75, 0.75, 0.75, 0, 0, 0.75],
                [ -9, -8,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -9, -6,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -8,-11,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -8, -9,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -8, -8,   0.75, 0.75, 0.75, 0.75, 0.75, 0, 0, 0.75],
                [ -8, -6,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -6,-11,   0.75, 0.75, 0.75, 1.75, 1.75, 0, 0, 1.75],
                [ -6, -9,   0.0, 0.0, 0.75, 0.75, 0.75, 0, 0, 0.75],
                [ -6, -8,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -6, -6,   1.75, 1.75, 1.75, 1.75, 1.75, 0, 0, 1.75],
            ]

        associationStats = DBUtil.execute(associationQuery)
        #for row in expectedAssociationStats:
        #    print >> sys.stderr, row;
        #print >> sys.stderr, "============"
        #for row in associationStats:
        #    print >> sys.stderr, row;
        #print >> sys.stderr, "============"
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        expectedItemBaseCountById = \
            {
                -1: 0,
                -2: 0,
                -3: 0,
                -4: 0,
                -5: 0,
                -6: 1.75,
                -7: 0,
                -8: 0.75,
                -9: 0.75,
                -10: 0,
                -11: 1.75,
                -12: 0,
                -13: 0,
                -14: 0,
                -15: 0,
                -16: 0,
            }
        itemBaseCountById = self.dataManager.loadClinicalItemBaseCountByItemId(
            acceptCache=False)
        # Don't use cache, otherwise will get prior results
        #print >> sys.stderr, itemBaseCountById;
        self.assertEqualDict(expectedItemBaseCountById, itemBaseCountById)

    def test_decayingWindows(self):
        # Muthu's function to test DecayingWindows module

        associationQuery = \
            """
            select
                clinical_item_id, subsequent_item_id,
                patient_count_0, patient_count_3600, patient_count_86400, patient_count_604800,
                patient_count_2592000, patient_count_7776000, patient_count_31536000,
                patient_count_any
            from
                clinical_item_association
            where
                clinical_item_id < 0
            order by
                clinical_item_id, subsequent_item_id
            """

        decayAnalysisOptions = DecayAnalysisOptions()
        decayAnalysisOptions.startD = datetime(2000, 1, 9)
        decayAnalysisOptions.endD = datetime(2000, 2, 11)
        decayAnalysisOptions.windowLength = 10
        decayAnalysisOptions.decay = 0.9
        decayAnalysisOptions.delta = timedelta(weeks=4)
        decayAnalysisOptions.patientIds = [-22222, -33333]

        self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions)

        expectedAssociationStats = \
            [
                [-11,-11,   1.9, 1.9, 1.9, 1.9, 1.9, 0, 0, 1.9],    # Note that decaying windows approach will not try to update counts for time periods longer than the delta period
                [-11, -9,   0.0, 0.0, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [-11, -8,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted. Consider future upgrade. Don't train on all time ever, but train on two deltas at a time, sliding / shifting window so do catch the overlap ranges. Problem here is buffer based algorithm, won't be recording analyze_dates as go, so will end up with duplicate counts of items each month?
                [-11, -6,   0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [ -9,-11,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -9, -9,   0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [ -9, -8,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -9, -6,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],
                [ -8,-11,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted.
                [ -8, -9,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted.
                [ -8, -8,   0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [ -8, -6,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted.
                [ -6,-11,   0.9, 0.9, 0.9, 1.9, 1.9, 0, 0, 1.9],
                [ -6, -9,   0.0, 0.0, 0.9, 0.9, 0.9, 0, 0, 0.9],
                [ -6, -8,   0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0],     # 8 not in same delta as other items so co-occurence not gettign counted.
                [ -6, -6,   1.9, 1.9, 1.9, 1.9, 1.9, 0, 0, 1.9],
            ]

        associationStats = DBUtil.execute(associationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        #DBUtil.execute("delete from clinical_item_association")

        # Add another training period then should get a second decay multiplier for older data?
        # Weird in that incrementally building on prior data that is getting decayed, even though new training data actually occurred before chronologic time of data
        decayAnalysisOptions = DecayAnalysisOptions()
        decayAnalysisOptions.startD = datetime(2000, 1, 1)
        decayAnalysisOptions.endD = datetime(2000, 2, 12)
        decayAnalysisOptions.windowLength = 10
        decayAnalysisOptions.decay = 0.9
        decayAnalysisOptions.delta = timedelta(weeks=4)
        decayAnalysisOptions.patientIds = [-22222, -33333]

        self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions)

        expectedAssociationStats = \
            [
                [-11L, -11L, 1.539, 1.539, 1.539, 1.539, 1.539, 0.0, 0.0, 1.539],
                [-11L, -9L, 0.0, 0.0, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729],
                [-11L, -8L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-11L, -7L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-11L, -6L, 0.729, 0.729, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729],
                [-9L, -11L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-9L, -9L, 0.729, 0.729, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729],
                [-9L, -8L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-9L, -7L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-9L, -6L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-8L, -11L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-8L, -9L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-8L, -8L, 0.729, 0.729, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729],
                [-8L, -6L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-7L, -11L, 0.0, 0.0, 0.0, 0.9, 0.9, 0.0, 0.0, 0.9],
                [-7L, -9L, 0.0, 0.0, 0.0, 0.9, 0.9, 0.0, 0.0, 0.9],
                [-7L, -7L, 0.9, 0.9, 0.9, 0.9, 0.9, 0.0, 0.0, 0.9],
                [-7L, -6L, 0.0, 0.0, 0.0, 0.9, 0.9, 0.0, 0.0, 0.9],
                [-6L, -11L, 0.729, 0.729, 0.729, 1.539, 1.539, 0.0, 0.0, 1.539],
                [-6L, -9L, 0.0, 0.0, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729],
                [-6L, -8L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-6L, -7L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                [-6L, -6L, 1.539, 1.539, 1.539, 1.539, 1.539, 0.0, 0.0, 1.539],
            ]

        associationStats = DBUtil.execute(associationQuery)
        #for row in expectedAssociationStats:
        #    print >> sys.stderr, row;
        #print >> sys.stderr, "============"
        #for row in associationStats:
        #    print >> sys.stderr, row;
        #print >> sys.stderr, "============"
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

    def test_resetModel(self):
        associationQuery = \
            """
            select
                clinical_item_id, subsequent_item_id,
                patient_count_0, patient_count_3600, patient_count_86400, patient_count_604800,
                patient_count_2592000, patient_count_7776000, patient_count_31536000,
                patient_count_any
            from
                clinical_item_association
            where
                clinical_item_id < 0
            order by
                clinical_item_id, subsequent_item_id
            """

        associationQueryDate = \
            """
            select
                patient_item_id, analyze_date
            from
                patient_item
            where
                patient_item_id < 0
            order by
                patient_item_id
            """

        # fill up the association table with something
        decayAnalysisOptions = DecayAnalysisOptions()
        decayAnalysisOptions.startD = datetime(2000, 1, 9)
        decayAnalysisOptions.endD = datetime(2000, 2, 11)
        decayAnalysisOptions.windowLength = 10
        decayAnalysisOptions.decay = 0.9
        decayAnalysisOptions.patientIds = [-22222, -33333]
        self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions)

        # then clear the table
        self.dataManager.resetAssociationModel()

        expectedAssociationStats = \
            [
            ]
        associationStats = DBUtil.execute(associationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        # Set as NULL
        expectedAssociationStatsDate = \
            [[-95, None],[-94, None],[-16, None], [-15, None], [-14, None], [-13, None], [-12, None], [-10, None], [-5, None], [-4, None], [-3, None], [-2, None], [-1, None]
            ]
        associationStatsDate = DBUtil.execute(associationQueryDate)
        #print >> sys.stderr, associationStatsDate
        self.assertEqualTable(expectedAssociationStatsDate,
                              associationStatsDate)
Beispiel #13
0
 def __init__(self):
     self.connFactory = DBUtil.ConnectionFactory()
     # Default connection source
     self.dataManager = DataManager()