def build_composite_clinical_item(components, name, description, category_id): """ Simple wrapper around medinfo/cpoe/DataManager.py """ component_str = ','.join([str(id) for id in components]) log.debug('(%s, %s, %s) = (%s)' % (name, description, category_id, \ component_str)) composite_arg = '%s|%s|%s|%s' % (component_str, name, description, \ category_id) dm = DataManager() dm.main([ 'medinfo/cpoe/DataManager.py', '--compositeRelated', composite_arg ])
def action_updateCounts(self): # Update the summary counts to facilitate future rapid queries dataManager = DataManager() dataManager.updateClinicalItemCounts()
class TestOutcomePredictionAnalysis(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Populate the database with test data") from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), RowItemModel( [-7, "Admit Dx"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = [ "clinical_item_id", "clinical_item_category_id", "analysis_status", "name" ] dataModels = \ [ RowItemModel( [-1, -1, 1, "CBC"], headers ), RowItemModel( [-2, -1, 1, "BMP"], headers ), RowItemModel( [-3, -1, 1, "Hepatic Panel"], headers ), RowItemModel( [-4, -1, 1, "Cardiac Enzymes"], headers ), RowItemModel( [-5, -2, 1, "CXR"], headers ), RowItemModel( [-6, -2, 1, "RUQ Ultrasound"], headers ), RowItemModel( [-7, -2, 1, "CT Abdomen/Pelvis"], headers ), RowItemModel( [-8, -2, 1, "CT PE Thorax"], headers ), RowItemModel( [-9, -3, 1, "Acetaminophen"], headers ), RowItemModel( [-10, -3, 1, "Carvedilol"], headers ), RowItemModel( [-11, -3, 1, "Enoxaparin"], headers ), RowItemModel( [-12, -3, 1, "Warfarin"], headers ), RowItemModel( [-13, -3, 1, "Ceftriaxone"], headers ), RowItemModel( [-14, -4, 1, "Admit"], headers ), RowItemModel( [-15, -4, 1, "Discharge"], headers ), RowItemModel( [-16, -4, 1, "Readmit"], headers ), RowItemModel( [-22, -5, 1, "Diagnosis 2"], headers ), RowItemModel( [-23, -5, 1, "Diagnosis 3"], headers ), RowItemModel( [-24, -5, 1, "Diagnosis 4"], headers ), RowItemModel( [-30, -6, 1, "Troponin (High)"], headers ), RowItemModel( [-31, -6, 1, "BNP (High)"], headers ), RowItemModel( [-32, -6, 1, "Creatinine (High)"], headers ), RowItemModel( [-33, -6, 1, "ESR (High)"], headers ), RowItemModel( [-21, -7, 0, "Diagnosis 1"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-52, -11111, -23, datetime(1999, 9, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-51, -11111, -21, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 1), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-60, -11111, -32, datetime(2000, 1, 1, 4), datetime(2010, 1, 1, 0)], headers ), # Within query time RowItemModel( [-61, -11111, -30, datetime(2000, 1, 4, 0), datetime(2010, 1, 1, 0)], headers ), # Within 1 week RowItemModel( [-62, -11111, -31, datetime(2000, 1,10, 0), datetime(2010, 1, 1, 0)], headers ), # Past 1 week RowItemModel( [-55, -22222, -21, datetime(2000, 1, 8, 0), datetime(2010, 1, 1, 0)], headers ), # Admit Dx RowItemModel( [-12, -22222, -6, datetime(2000, 1, 8, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -14, datetime(2000, 1, 8, 1), datetime(2010, 1, 1, 0)], headers ), # Admit RowItemModel( [-14, -22222, -7, datetime(2000, 1, 8, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-15, -22222, -8, datetime(2000, 1, 8, 3), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-16, -22222, -15, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), # Discharge RowItemModel( [-56, -22222, -21, datetime(2000, 1,13, 0), datetime(2010, 1, 1, 0)], headers ), # Admit Dx RowItemModel( [-17, -22222, -9, datetime(2000, 1,13, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-19, -22222, -14, datetime(2000, 1,13, 1), datetime(2010, 1, 1, 0)], headers ), # Admit (Readmit) RowItemModel( [-20, -22222, -10, datetime(2000, 1,13, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-21, -22222, -11, datetime(2000, 1,13, 3), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-22, -22222, -15, datetime(2000, 1,18, 0), datetime(2010, 1, 1, 0)], headers ), # Discharge ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = \ [ "clinical_item_id","subsequent_item_id", "count_0","count_3600","count_86400","count_604800","count_any", "time_diff_sum", "time_diff_sum_squares", ] dataModels = \ [ RowItemModel( [ -1, -1, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -2, -2, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -3, -3, 95, 95, 97, 97, 97, 0.0, 0.0], headers ), RowItemModel( [ -4, -4, 40, 40, 40, 40, 40, 0.0, 0.0], headers ), RowItemModel( [ -5, -5, 40, 40, 50, 50, 50, 0.0, 0.0], headers ), RowItemModel( [ -6, -6, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -7, -7, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -8, -8, 35, 35, 35, 50, 80, 0.0, 0.0], headers ), RowItemModel( [-10,-10, 45, 45, 55, 60, 90, 0.0, 0.0], headers ), RowItemModel( [-12,-12, 75, 75, 75, 80, 90, 0.0, 0.0], headers ), RowItemModel( [-14,-14, 100, 100, 100, 100, 100, 0.0, 0.0], headers ), RowItemModel( [-15,-15, 100, 100, 100, 100, 100, 0.0, 0.0], headers ), RowItemModel( [-16,-16, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [-30,-30, 3, 3, 3, 3, 3, 0.0, 0.0], headers ), RowItemModel( [-31,-31, 4, 4, 4, 4, 4, 0.0, 0.0], headers ), RowItemModel( [-32,-32, 4, 4, 4, 4, 4, 0.0, 0.0], headers ), RowItemModel( [-33,-33, 5, 5, 5, 5, 5, 0.0, 0.0], headers ), RowItemModel( [ -2, -4, 0, 2, 3, 3, 3, 200.0, 50000.0], headers ), RowItemModel( [ -2, -6, 2, 2, 5, 5, 5, 300.0, 11990.0], headers ), RowItemModel( [ -3, -1, 20, 23, 23, 23, 23, 400.0, 344990.0], headers ), RowItemModel( [ -4, -5, 3, 3, 13, 43, 43, 340.0, 343110.0], headers ), RowItemModel( [ -4, -6, 23, 33, 33, 33, 63, 420.0, 245220.0], headers ), RowItemModel( [ -4, -7, 27, 33, 33, 33, 63, 40.0, 5420.0], headers ), RowItemModel( [ -4,-10, 25, 35, 40, 45, 63, 47.0, 5420.0], headers ), RowItemModel( [ -5, -4, 0, 0, 20, 20, 20, 540.0, 54250.0], headers ), RowItemModel( [ -6,-16, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), RowItemModel( [ -8,-16, 5, 5, 5, 5, 5, 0.0, 0.0], headers ), RowItemModel( [-10,-16, 8, 8, 8, 8, 8, 0.0, 0.0], headers ), RowItemModel( [-10,-30, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), RowItemModel( [-10,-31, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), RowItemModel( [-12,-30, 20, 20, 20, 20, 20, 0.0, 0.0], headers ), RowItemModel( [-12,-31, 20, 20, 20, 20, 20, 0.0, 0.0], headers ), RowItemModel( [-10,-32, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), RowItemModel( [-10,-33, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) # Indicate that cache data needs to be updated self.dataManager = DataManager() self.dataManager.clearCacheData("analyzedPatientCount") self.dataManager.clearCacheData("clinicalItemCountsUpdated") # Instance to test on self.analyzer = OutcomePredictionAnalysis() self.preparer = PreparePatientItems() def tearDown(self): """Restore state from any setUp or test steps""" log.info("Purge test records from the database") DBUtil.execute( "delete from clinical_item_association where clinical_item_id < 0") DBUtil.execute("delete from patient_item where patient_item_id < 0") DBUtil.execute("delete from clinical_item where clinical_item_id < 0") DBUtil.execute( "delete from clinical_item_category where clinical_item_category_id in (%s)" % str.join(",", self.clinicalItemCategoryIdStrList)) DBTestCase.tearDown(self) def test_recommenderAnalysis(self): # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery() analysisQuery.patientIds = set([-11111]) analysisQuery.baseCategoryId = -7 analysisQuery.queryTimeSpan = timedelta(0, 86400) #analysisQuery.recommender = BaselineFrequencyRecommender(); analysisQuery.recommender = ItemAssociationRecommender() analysisQuery.baseRecQuery = RecommenderQuery() analysisQuery.baseRecQuery.targetItemIds = set([-33, -32, -31, -30]) analysisQuery.baseRecQuery.maxRecommendedId = 0 # Restrict to test data # Initial run without time limits on outcome measure colNames = [ "patient_id", "outcome.-33", "score.-33", "outcome.-32", "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30" ] expectedResults = [ RowItemModel([-11111, +0, 0.222, +2, 0.611, +1, 0.222, +1, 0.222], colNames) ] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o", "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender", '0,-11111', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Redo through prepared file intermediary sys.stdout = StringIO() argv = [ "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400", "-o", "-33,-32,-31,-30", '0,-11111', "-" ] self.preparer.main(argv) preparedDataFile = StringIO(sys.stdout.getvalue()) sys.stdin = preparedDataFile # Read prepared data file from redirected stdin sys.stdout = StringIO() argv = [ "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R", "ItemAssociationRecommender", '-', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Now try with time limitation on outcome measure analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 604800) # 1 week colNames = [ "patient_id", "outcome.-33", "score.-33", "outcome.-32", "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30" ] expectedResults = [ RowItemModel([-11111, +0, 0.222, +2, 0.611, +0, 0.222, +1, 0.222], colNames) ] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t", "604800", "-o", "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender", '0,-11111', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Redo through prepared file intermediary sys.stdout = StringIO() argv = [ "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400", "-t", "604800", "-o", "-33,-32,-31,-30", '0,-11111', "-" ] self.preparer.main(argv) preparedDataFile = StringIO(sys.stdout.getvalue()) sys.stdin = preparedDataFile # Read prepared data file from redirected stdin sys.stdout = StringIO() argv = [ "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R", "ItemAssociationRecommender", "-t", "604800", '-', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Again, but with much stricter time limit (negative test case) analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 172800) # 2 day colNames = [ "patient_id", "outcome.-33", "score.-33", "outcome.-32", "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30" ] expectedResults = [ RowItemModel([-11111, 0, 0.0109, 2, 0.0600, 0, 0.0109, 0, 0.0109], colNames) ] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t", "172800", "-o", "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender", '0,-11111', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Redo through prepared file intermediary sys.stdout = StringIO() argv = [ "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400", "-t", "172800", "-o", "-33,-32,-31,-30", '0,-11111', "-" ] self.preparer.main(argv) preparedDataFile = StringIO(sys.stdout.getvalue()) sys.stdin = preparedDataFile # Read prepared data file from redirected stdin sys.stdout = StringIO() argv = [ "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R", "ItemAssociationRecommender", "-t", "172800", '-', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) def test_tripleSequence_virtualItem(self): # Test outcome assessment when the target is a virtual item based on the presence of a triple (instead of double) sequence of items # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery() analysisQuery.patientIds = set([-22222]) analysisQuery.baseCategoryId = -7 analysisQuery.queryTimeSpan = timedelta(0, 86400) analysisQuery.sequenceItemIdsByVirtualItemId[-16] = (-15, -14) #analysisQuery.recommender = BaselineFrequencyRecommender(); analysisQuery.recommender = ItemAssociationRecommender() analysisQuery.baseRecQuery = RecommenderQuery() analysisQuery.baseRecQuery.targetItemIds = set([-16]) analysisQuery.baseRecQuery.maxRecommendedId = 0 # Restrict to test data # Initial run without time limits on outcome measure colNames = ["patient_id", "outcome.-16", "score.-16"] expectedResults = [RowItemModel([-22222, +1, 0.14286], colNames)] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o", "-16=-15:-14", "-m", "0", "-R", "ItemAssociationRecommender", '0,-22222', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames)
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Populate the database with test data") from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), RowItemModel( [-7, "Admit Dx"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = [ "clinical_item_id", "clinical_item_category_id", "analysis_status", "name" ] dataModels = \ [ RowItemModel( [-1, -1, 1, "CBC"], headers ), RowItemModel( [-2, -1, 1, "BMP"], headers ), RowItemModel( [-3, -1, 1, "Hepatic Panel"], headers ), RowItemModel( [-4, -1, 1, "Cardiac Enzymes"], headers ), RowItemModel( [-5, -2, 1, "CXR"], headers ), RowItemModel( [-6, -2, 1, "RUQ Ultrasound"], headers ), RowItemModel( [-7, -2, 1, "CT Abdomen/Pelvis"], headers ), RowItemModel( [-8, -2, 1, "CT PE Thorax"], headers ), RowItemModel( [-9, -3, 1, "Acetaminophen"], headers ), RowItemModel( [-10, -3, 1, "Carvedilol"], headers ), RowItemModel( [-11, -3, 1, "Enoxaparin"], headers ), RowItemModel( [-12, -3, 1, "Warfarin"], headers ), RowItemModel( [-13, -3, 1, "Ceftriaxone"], headers ), RowItemModel( [-14, -4, 1, "Admit"], headers ), RowItemModel( [-15, -4, 1, "Discharge"], headers ), RowItemModel( [-16, -4, 1, "Readmit"], headers ), RowItemModel( [-22, -5, 1, "Diagnosis 2"], headers ), RowItemModel( [-23, -5, 1, "Diagnosis 3"], headers ), RowItemModel( [-24, -5, 1, "Diagnosis 4"], headers ), RowItemModel( [-30, -6, 1, "Troponin (High)"], headers ), RowItemModel( [-31, -6, 1, "BNP (High)"], headers ), RowItemModel( [-32, -6, 1, "Creatinine (High)"], headers ), RowItemModel( [-33, -6, 1, "ESR (High)"], headers ), RowItemModel( [-21, -7, 0, "Diagnosis 1"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-52, -11111, -23, datetime(1999, 9, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-51, -11111, -21, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 1), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-60, -11111, -32, datetime(2000, 1, 1, 4), datetime(2010, 1, 1, 0)], headers ), # Within query time RowItemModel( [-61, -11111, -30, datetime(2000, 1, 4, 0), datetime(2010, 1, 1, 0)], headers ), # Within 1 week RowItemModel( [-62, -11111, -31, datetime(2000, 1,10, 0), datetime(2010, 1, 1, 0)], headers ), # Past 1 week RowItemModel( [-55, -22222, -21, datetime(2000, 1, 8, 0), datetime(2010, 1, 1, 0)], headers ), # Admit Dx RowItemModel( [-12, -22222, -6, datetime(2000, 1, 8, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -14, datetime(2000, 1, 8, 1), datetime(2010, 1, 1, 0)], headers ), # Admit RowItemModel( [-14, -22222, -7, datetime(2000, 1, 8, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-15, -22222, -8, datetime(2000, 1, 8, 3), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-16, -22222, -15, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), # Discharge RowItemModel( [-56, -22222, -21, datetime(2000, 1,13, 0), datetime(2010, 1, 1, 0)], headers ), # Admit Dx RowItemModel( [-17, -22222, -9, datetime(2000, 1,13, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-19, -22222, -14, datetime(2000, 1,13, 1), datetime(2010, 1, 1, 0)], headers ), # Admit (Readmit) RowItemModel( [-20, -22222, -10, datetime(2000, 1,13, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-21, -22222, -11, datetime(2000, 1,13, 3), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-22, -22222, -15, datetime(2000, 1,18, 0), datetime(2010, 1, 1, 0)], headers ), # Discharge ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = \ [ "clinical_item_id","subsequent_item_id", "count_0","count_3600","count_86400","count_604800","count_any", "time_diff_sum", "time_diff_sum_squares", ] dataModels = \ [ RowItemModel( [ -1, -1, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -2, -2, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -3, -3, 95, 95, 97, 97, 97, 0.0, 0.0], headers ), RowItemModel( [ -4, -4, 40, 40, 40, 40, 40, 0.0, 0.0], headers ), RowItemModel( [ -5, -5, 40, 40, 50, 50, 50, 0.0, 0.0], headers ), RowItemModel( [ -6, -6, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -7, -7, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -8, -8, 35, 35, 35, 50, 80, 0.0, 0.0], headers ), RowItemModel( [-10,-10, 45, 45, 55, 60, 90, 0.0, 0.0], headers ), RowItemModel( [-12,-12, 75, 75, 75, 80, 90, 0.0, 0.0], headers ), RowItemModel( [-14,-14, 100, 100, 100, 100, 100, 0.0, 0.0], headers ), RowItemModel( [-15,-15, 100, 100, 100, 100, 100, 0.0, 0.0], headers ), RowItemModel( [-16,-16, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [-30,-30, 3, 3, 3, 3, 3, 0.0, 0.0], headers ), RowItemModel( [-31,-31, 4, 4, 4, 4, 4, 0.0, 0.0], headers ), RowItemModel( [-32,-32, 4, 4, 4, 4, 4, 0.0, 0.0], headers ), RowItemModel( [-33,-33, 5, 5, 5, 5, 5, 0.0, 0.0], headers ), RowItemModel( [ -2, -4, 0, 2, 3, 3, 3, 200.0, 50000.0], headers ), RowItemModel( [ -2, -6, 2, 2, 5, 5, 5, 300.0, 11990.0], headers ), RowItemModel( [ -3, -1, 20, 23, 23, 23, 23, 400.0, 344990.0], headers ), RowItemModel( [ -4, -5, 3, 3, 13, 43, 43, 340.0, 343110.0], headers ), RowItemModel( [ -4, -6, 23, 33, 33, 33, 63, 420.0, 245220.0], headers ), RowItemModel( [ -4, -7, 27, 33, 33, 33, 63, 40.0, 5420.0], headers ), RowItemModel( [ -4,-10, 25, 35, 40, 45, 63, 47.0, 5420.0], headers ), RowItemModel( [ -5, -4, 0, 0, 20, 20, 20, 540.0, 54250.0], headers ), RowItemModel( [ -6,-16, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), RowItemModel( [ -8,-16, 5, 5, 5, 5, 5, 0.0, 0.0], headers ), RowItemModel( [-10,-16, 8, 8, 8, 8, 8, 0.0, 0.0], headers ), RowItemModel( [-10,-30, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), RowItemModel( [-10,-31, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), RowItemModel( [-12,-30, 20, 20, 20, 20, 20, 0.0, 0.0], headers ), RowItemModel( [-12,-31, 20, 20, 20, 20, 20, 0.0, 0.0], headers ), RowItemModel( [-10,-32, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), RowItemModel( [-10,-33, 10, 10, 10, 10, 10, 0.0, 0.0], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) # Indicate that cache data needs to be updated self.dataManager = DataManager() self.dataManager.clearCacheData("analyzedPatientCount") self.dataManager.clearCacheData("clinicalItemCountsUpdated") # Instance to test on self.analyzer = OutcomePredictionAnalysis() self.preparer = PreparePatientItems()
class TestDataManager(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() log.info("Populate the database with test data") self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = [ "clinical_item_id", "clinical_item_category_id", "name", "analysis_status" ] dataModels = \ [ RowItemModel( [-1, -1, "CBC",1], headers ), RowItemModel( [-2, -1, "BMP",0], headers ), # Clear analysis status, so this will be ignored unless changed RowItemModel( [-3, -1, "Hepatic Panel",1], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes",1], headers ), RowItemModel( [-5, -2, "CXR",1], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound",1], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis",1], headers ), RowItemModel( [-8, -2, "CT PE Thorax",1], headers ), RowItemModel( [-9, -3, "Acetaminophen",1], headers ), RowItemModel( [-10, -3, "Carvedilol",1], headers ), RowItemModel( [-11, -3, "Enoxaparin",1], headers ), RowItemModel( [-12, -3, "Warfarin",1], headers ), RowItemModel( [-13, -3, "Ceftriaxone",1], headers ), RowItemModel( [-14, -4, "Foley Catheter",1], headers ), RowItemModel( [-15, -4, "Strict I&O",1], headers ), RowItemModel( [-16, -4, "Fall Precautions",1], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) self.clinicalItemQuery = \ """ select clinical_item_id, name, analysis_status, default_recommend from clinical_item where clinical_item_id < 0 order by clinical_item_id desc """ headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-4, -11111, -10, datetime(2000, 1, 2, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-10, -22222, -7, datetime(2000, 1, 5, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-12, -22222, -6, datetime(2000, 1, 9, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-14, -33333, -6, datetime(2000, 2, 9, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-15, -33333, -2, datetime(2000, 2,11, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-16, -33333, -11, datetime(2000, 2,11, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-17, -33333, -11, datetime(2001, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) self.patientItemQuery = \ """ select patient_item_id, patient_id, clinical_item_id, item_date, analyze_date from patient_item where clinical_item_id < 0 order by patient_id desc, item_date, patient_item_id desc """ headers = [ "clinical_item_id","subsequent_item_id",\ "count_0","count_3600","count_86400","count_604800","count_any", "time_diff_sum","time_diff_sum_squares", "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any", "patient_time_diff_sum","patient_time_diff_sum_squares", "patient_count_0","encounter_count_0", ] dataModels = \ [ RowItemModel( [-11,-11, 3, 3, 3, 3, 4, 999.0, 9999.0, 2, 2, 2, 2, 2, 999.0, 9999.0, 2,2], headers ), RowItemModel( [-11, -7, 0, 0, 0, 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0.0, 0.0, 0,0], headers ), RowItemModel( [-11, -6, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [-11, -2, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [ -7,-11, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0,0], headers ), RowItemModel( [ -7, -7, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [ -7, -6, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0,0], headers ), RowItemModel( [ -6,-11, 1, 1, 1, 2, 2, 172800.0, 29859840000.0, 1, 1, 1, 2, 2, 172800.0, 29859840000.0, 1,1], headers ), RowItemModel( [ -6, -7, 0, 0, 0, 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0.0, 0.0, 0,0], headers ), RowItemModel( [ -6, -6, 2, 2, 2, 2, 2, 0.0, 0.0, 2, 2, 2, 2, 2, 0.0, 0.0, 2,2], headers ), RowItemModel( [ -6, -2, 0, 0, 0, 1, 1, 172800.0, 29859840000.0, 0, 0, 0, 1, 1, 172800.0, 29859840000.0, 0,0], headers ), RowItemModel( [ -2,-11, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [ -2, -7, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [ -2, -6, 0, 0, 0, 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0.0, 0.0, 0,0], headers ), RowItemModel( [ -2, -2, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) self.clinicalItemAssociationQuery = \ """ select clinical_item_id, subsequent_item_id, count_0,count_3600,count_86400,count_604800,count_any, time_diff_sum,time_diff_sum_squares, patient_count_0,patient_count_3600,patient_count_86400,patient_count_604800,patient_count_any, patient_time_diff_sum, patient_time_diff_sum_squares from clinical_item_association where clinical_item_id < 0 order by clinical_item_id, subsequent_item_id """ self.analyzer = DataManager() # Instance to test on self.analyzer.maxClinicalItemId = 0 # Avoid testing on "real" data def tearDown(self): """Restore state from any setUp or test steps""" log.info("Purge test records from the database") DBUtil.execute( "delete from clinical_item_link where clinical_item_id < 0") DBUtil.execute( "delete from backup_link_patient_item where patient_item_id < 0") DBUtil.execute( "delete from clinical_item_association where clinical_item_id < 0") DBUtil.execute("delete from patient_item where patient_id < 0") DBUtil.execute("delete from clinical_item where clinical_item_id < 0") DBUtil.execute( "delete from clinical_item_category where clinical_item_category_id in (%s)" % str.join(",", self.clinicalItemCategoryIdStrList)) DBTestCase.tearDown(self) def test_deactivateAnalysis(self): clinicalItemIds = set([-6, -11]) self.analyzer.deactivateAnalysis(clinicalItemIds) expectedClinicalItemStatus = \ [ [-1, "CBC",1, 1], [-2, "BMP",0, 1], [-3, "Hepatic Panel",1, 1], [-4, "Cardiac Enzymes",1, 1], [-5, "CXR",1, 1], [-6, "RUQ Ultrasound",0, 1], [-7, "CT Abdomen/Pelvis",1, 1], [-8, "CT PE Thorax",1, 1], [-9, "Acetaminophen",1, 1], [-10, "Carvedilol",1, 1], [-11, "Enoxaparin",0, 1], [-12, "Warfarin",1, 1], [-13, "Ceftriaxone",1, 1], [-14, "Foley Catheter",1, 1], [-15, "Strict I&O",1, 1], [-16, "Fall Precautions",1, 1], ] clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery) self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus) expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important [-1, -11111, -4, datetime(2000, 1, 1, 0), "*"], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [-3, -11111, -8, datetime(2000, 1, 1, 2), "*"], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [-10, -22222, -7, datetime(2000, 1, 5, 0), "*"], [-12, -22222, -6, datetime(2000, 1, 9, 0), None], [-13, -22222, -11, datetime(2000, 1, 9, 0), None], [-14, -33333, -6, datetime(2000, 2, 9, 0), None], [-15, -33333, -2, datetime(2000, 2,11, 0), "*"], [-16, -33333, -11, datetime(2000, 2,11, 0), None], [-17, -33333, -11, datetime(2001, 1, 1, 0), None], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) expectedAssociationStats = \ [ [ -7, -7, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0], [ -2, -7, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0], [ -2, -2, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0], ] associationStats = DBUtil.execute(self.clinicalItemAssociationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) def test_deactivateAnalysisByCount(self): thresholdInstanceCount = 1 categoryIds = [-1, -2] self.analyzer.deactivateAnalysisByCount(thresholdInstanceCount, categoryIds) expectedClinicalItemStatus = \ [ [-1, "CBC",0, 1], [-2, "BMP",0, 1], [-3, "Hepatic Panel",0, 1], [-4, "Cardiac Enzymes",0, 1], [-5, "CXR",0, 1], [-6, "RUQ Ultrasound",1, 1], [-7, "CT Abdomen/Pelvis",0, 1], [-8, "CT PE Thorax",0, 1], [-9, "Acetaminophen",1, 1], # Different category, so should be left alone [-10, "Carvedilol",1, 1], [-11, "Enoxaparin",1, 1], [-12, "Warfarin",1, 1], [-13, "Ceftriaxone",1, 1], [-14, "Foley Catheter",1, 1], [-15, "Strict I&O",1, 1], [-16, "Fall Precautions",1, 1], ] clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery) self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus) expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important [-1, -11111, -4, datetime(2000, 1, 1, 0), None], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [-3, -11111, -8, datetime(2000, 1, 1, 2), None], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [-10, -22222, -7, datetime(2000, 1, 5, 0), None], [-12, -22222, -6, datetime(2000, 1, 9, 0), "*"], [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"], [-14, -33333, -6, datetime(2000, 2, 9, 0), "*"], [-15, -33333, -2, datetime(2000, 2,11, 0), None], [-16, -33333, -11, datetime(2000, 2,11, 0), "*"], [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) expectedAssociationStats = \ [ [-11,-11, 3, 3, 3, 3, 4, 999.0, 9999.0, 2, 2, 2, 2, 2, 999.0, 9999.0], [-11, -6, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0], [ -6,-11, 1, 1, 1, 2, 2, 172800.0, 29859840000.0, 1, 1, 1, 2, 2, 172800.0, 29859840000.0], [ -6, -6, 2, 2, 2, 2, 2, 0.0, 0.0, 2, 2, 2, 2, 2, 0.0, 0.0], ] associationStats = DBUtil.execute(self.clinicalItemAssociationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) def test_compositeRelated(self): # Simulate command-line execution self.analyzer.main([ "medinfo/cpoe/DataManager.py", "-c", "-2,-4,-8|NewComposite|New Composite Item|-1|-100" ]) #compositeId = self.analyzer.compositeRelated( (-2,-4,-8), "NewComposite","New Composite Item", -1, -100 ); # Revise the new item ID to a sentinel test value expectedClinicalItemStatus = \ [ [-1, "CBC",1, 1], [-2, "BMP",0, 1], [-3, "Hepatic Panel",1, 1], [-4, "Cardiac Enzymes",1, 1], [-5, "CXR",1, 1], [-6, "RUQ Ultrasound",1, 1], [-7, "CT Abdomen/Pelvis",1, 1], [-8, "CT PE Thorax",1, 1], [-9, "Acetaminophen",1, 1], [-10, "Carvedilol",1, 1], [-11, "Enoxaparin",1, 1], [-12, "Warfarin",1, 1], [-13, "Ceftriaxone",1, 1], [-14, "Foley Catheter",1, 1], [-15, "Strict I&O",1, 1], [-16, "Fall Precautions",1, 1], [-100,"NewComposite", 1, 0], # Remove from default recommend list ] clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery) self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus) expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important # Likewise, use None for primary ID key whose specific value is unimportant [None,-11111,-100, datetime(2000, 1, 1, 0), None], [-1, -11111, -4, datetime(2000, 1, 1, 0), "*"], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [None,-11111,-100, datetime(2000, 1, 1, 2), None], [-3, -11111, -8, datetime(2000, 1, 1, 2), "*"], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [-10, -22222, -7, datetime(2000, 1, 5, 0), "*"], [-12, -22222, -6, datetime(2000, 1, 9, 0), "*"], [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"], [-14, -33333, -6, datetime(2000, 2, 9, 0), "*"], [None,-33333,-100, datetime(2000, 2,11, 0), None], [-15, -33333, -2, datetime(2000, 2,11, 0), "*"], [-16, -33333, -11, datetime(2000, 2,11, 0), "*"], [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) # Check for tracking link records linkQuery = \ """ select clinical_item_id, linked_item_id from clinical_item_link where clinical_item_id < 0 order by clinical_item_id desc, linked_item_id desc """ expectedItems = \ [ [-100,-2], [-100,-4], [-100,-8], ] actualItems = DBUtil.execute(linkQuery) self.assertEqualTable(expectedItems, actualItems) log.debug("Test incremental update via command-line") self.analyzer.main(["medinfo/cpoe/DataManager.py", "-g", "-6|-100"]) #self.analyzer.generatePatientItemsForCompositeId( (-6,), -100 ); expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important # Likewise, use None for primary ID key whose specific value is unimportant [None,-11111,-100, datetime(2000, 1, 1, 0), None], [-1, -11111, -4, datetime(2000, 1, 1, 0), "*"], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [None,-11111,-100, datetime(2000, 1, 1, 2), None], [-3, -11111, -8, datetime(2000, 1, 1, 2), "*"], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [-10, -22222, -7, datetime(2000, 1, 5, 0), "*"], [None,-22222,-100, datetime(2000, 1, 9, 0), None], [-12, -22222, -6, datetime(2000, 1, 9, 0), "*"], [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"], [None,-33333,-100, datetime(2000, 2, 9, 0), None], [-14, -33333, -6, datetime(2000, 2, 9, 0), "*"], [None,-33333,-100, datetime(2000, 2,11, 0), None], [-15, -33333, -2, datetime(2000, 2,11, 0), "*"], [-16, -33333, -11, datetime(2000, 2,11, 0), "*"], [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) # Check for tracking link records expectedItems = \ [ [-100,-2], [-100,-4], [-100,-6], [-100,-8], ] actualItems = DBUtil.execute(linkQuery) self.assertEqualTable(expectedItems, actualItems) log.debug("Test inherited update") self.analyzer.main([ "medinfo/cpoe/DataManager.py", "-c", "-7,-100|InheritingComposite|Inheriting Composite Item|-1|-101" ]) #compositeId = self.analyzer.compositeRelated( (-7,-100), "InheritingComposite","Inheriting Composite Item", -1, -101 ); # Revise the new item ID to a sentinel test value expectedClinicalItemStatus = \ [ [-1, "CBC",1, 1], [-2, "BMP",0, 1], [-3, "Hepatic Panel",1, 1], [-4, "Cardiac Enzymes",1, 1], [-5, "CXR",1, 1], [-6, "RUQ Ultrasound",1, 1], [-7, "CT Abdomen/Pelvis",1, 1], [-8, "CT PE Thorax",1, 1], [-9, "Acetaminophen",1, 1], [-10, "Carvedilol",1, 1], [-11, "Enoxaparin",1, 1], [-12, "Warfarin",1, 1], [-13, "Ceftriaxone",1, 1], [-14, "Foley Catheter",1, 1], [-15, "Strict I&O",1, 1], [-16, "Fall Precautions",1, 1], [-100,"NewComposite", 1, 0], [-101,"InheritingComposite", 1, 0], ] clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery) self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus) expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important # Likewise, use None for primary ID key whose specific value is unimportant [None,-11111,-101, datetime(2000, 1, 1, 0), None], [None,-11111,-100, datetime(2000, 1, 1, 0), None], [-1, -11111, -4, datetime(2000, 1, 1, 0), "*"], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [None,-11111,-101, datetime(2000, 1, 1, 2), None], [None,-11111,-100, datetime(2000, 1, 1, 2), None], [-3, -11111, -8, datetime(2000, 1, 1, 2), "*"], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [None,-22222,-101, datetime(2000, 1, 5, 0), None], [-10, -22222, -7, datetime(2000, 1, 5, 0), "*"], [None,-22222,-101, datetime(2000, 1, 9, 0), None], [None,-22222,-100, datetime(2000, 1, 9, 0), None], [-12, -22222, -6, datetime(2000, 1, 9, 0), "*"], [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"], [None,-33333,-101, datetime(2000, 2, 9, 0), None], [None,-33333,-100, datetime(2000, 2, 9, 0), None], [-14, -33333, -6, datetime(2000, 2, 9, 0), "*"], [None,-33333,-101, datetime(2000, 2,11, 0), None], [None,-33333,-100, datetime(2000, 2,11, 0), None], [-15, -33333, -2, datetime(2000, 2,11, 0), "*"], [-16, -33333, -11, datetime(2000, 2,11, 0), "*"], [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) # Check for tracking link records expectedItems = \ [ [-100,-2], [-100,-4], [-100,-6], [-100,-8], [-101,-7], [-101,-100], ] actualItems = DBUtil.execute(linkQuery) self.assertEqualTable(expectedItems, actualItems) def test_mergeRelated(self): self.analyzer.mergeRelated(-6, (-7, -2)) expectedClinicalItemStatus = \ [ [-1, "CBC",1, 1], [-2, "BMP",0, 1], [-3, "Hepatic Panel",1, 1], [-4, "Cardiac Enzymes",1, 1], [-5, "CXR",1, 1], [-6, "RUQ Ultrasound+BMP+CT Abdomen/Pelvis",1, 1], [-7, "CT Abdomen/Pelvis",0, 1], [-8, "CT PE Thorax",1, 1], [-9, "Acetaminophen",1, 1], [-10, "Carvedilol",1, 1], [-11, "Enoxaparin",1, 1], [-12, "Warfarin",1, 1], [-13, "Ceftriaxone",1, 1], [-14, "Foley Catheter",1, 1], [-15, "Strict I&O",1, 1], [-16, "Fall Precautions",1, 1], ] clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery) self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus) expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important [-1, -11111, -4, datetime(2000, 1, 1, 0), "*"], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [-3, -11111, -8, datetime(2000, 1, 1, 2), "*"], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [-10, -22222, -6, datetime(2000, 1, 5, 0), None], # Reassign [-12, -22222, -6, datetime(2000, 1, 9, 0), "*"], [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"], [-14, -33333, -6, datetime(2000, 2, 9, 0), "*"], [-15, -33333, -6, datetime(2000, 2,11, 0), None], # Reassign [-16, -33333, -11, datetime(2000, 2,11, 0), "*"], [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) expectedAssociationStats = \ [ [-11,-11, 3, 3, 3, 3, 4, 999.0, 9999.0, 2, 2, 2, 2, 2, 999.0, 9999.0], [-11, -6, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0], [ -6,-11, 1, 1, 1, 2, 2, 172800.0, 29859840000.0, 1, 1, 1, 2, 2, 172800.0, 29859840000.0], [ -6, -6, 2, 2, 2, 2, 2, 0.0, 0.0, 2, 2, 2, 2, 2, 0.0, 0.0], ] associationStats = DBUtil.execute(self.clinicalItemAssociationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) # Check for backup of lost data backupQuery = \ """ select patient_item_id, clinical_item_id from backup_link_patient_item where clinical_item_id < 0 order by patient_item_id desc, clinical_item_id """ expectedBackupItems = \ [ [-10,-7], [-15,-2], ] backupItems = DBUtil.execute(backupQuery) self.assertEqualTable(expectedBackupItems, backupItems) def test_unifyRedundant(self): self.analyzer.unifyRedundant(-7, (-7, -2)) expectedClinicalItemStatus = \ [ [-1, "CBC",1, 1], [-2, "BMP",0, 1], [-3, "Hepatic Panel",1, 1], [-4, "Cardiac Enzymes",1, 1], [-5, "CXR",1, 1], [-6, "RUQ Ultrasound",1, 1], [-7, "CT Abdomen/Pelvis+BMP",1, 1], [-8, "CT PE Thorax",1, 1], [-9, "Acetaminophen",1, 1], [-10, "Carvedilol",1, 1], [-11, "Enoxaparin",1, 1], [-12, "Warfarin",1, 1], [-13, "Ceftriaxone",1, 1], [-14, "Foley Catheter",1, 1], [-15, "Strict I&O",1, 1], [-16, "Fall Precautions",1, 1], ] clinicalItemStatus = DBUtil.execute(self.clinicalItemQuery) self.assertEqualTable(expectedClinicalItemStatus, clinicalItemStatus) expectedPatientItems = \ [ # Use placeholder "*" for analyze date, just verify that it exists and is consistent. Actual value is not important [-1, -11111, -4, datetime(2000, 1, 1, 0), "*"], [-2, -11111, -10, datetime(2000, 1, 1, 0), "*"], [-3, -11111, -8, datetime(2000, 1, 1, 2), "*"], [-4, -11111, -10, datetime(2000, 1, 2, 0), "*"], [-5, -11111, -12, datetime(2000, 2, 1, 0), "*"], [-10, -22222, -7, datetime(2000, 1, 5, 0), "*"], [-12, -22222, -6, datetime(2000, 1, 9, 0), "*"], [-13, -22222, -11, datetime(2000, 1, 9, 0), "*"], [-14, -33333, -6, datetime(2000, 2, 9, 0), "*"], [-15, -33333, -2, datetime(2000, 2,11, 0), None], [-16, -33333, -11, datetime(2000, 2,11, 0), "*"], [-17, -33333, -11, datetime(2001, 1, 1, 0), "*"], ] patientItems = DBUtil.execute(self.patientItemQuery) self.assertEqualPatientItems(expectedPatientItems, patientItems) expectedAssociationStats = \ [ [-11,-11, 3, 3, 3, 3, 4, 999.0, 9999.0, 2, 2, 2, 2, 2, 999.0, 9999.0], [-11, -7, 0, 0, 0, 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0.0, 0.0], [-11, -6, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7,-11, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0, 0, 0, 1, 1, 345600.0, 119439360000.0], [ -7, -7, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0], [ -7, -6, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0, 0, 0, 1, 1, 345600.0, 119439360000.0], [ -6,-11, 1, 1, 1, 2, 2, 172800.0, 29859840000.0, 1, 1, 1, 2, 2, 172800.0, 29859840000.0], [ -6, -7, 0, 0, 0, 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0.0, 0.0], [ -6, -6, 2, 2, 2, 2, 2, 0.0, 0.0, 2, 2, 2, 2, 2, 0.0, 0.0], ] associationStats = DBUtil.execute(self.clinicalItemAssociationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) def assertEqualPatientItems(self, expectedPatientItems, patientItems): """Patch the expected items to look for whatever is the set analyze_date, and just adjust so expect will be present and consistent. Don't care about specific value. Likewise, don't care about primary key patient_item_id new values """ expectedAnalyzeDate = None for row in patientItems: analyzeDate = row[-1] if analyzeDate is not None: expectedAnalyzeDate = analyzeDate break for row in expectedPatientItems: if expectedAnalyzeDate is not None and row[-1] is not None: row[-1] = expectedAnalyzeDate for (expectedRow, actualRow) in zip(expectedPatientItems, patientItems): if expectedRow[0] is None: expectedRow[0] = actualRow[0] self.assertEqualTable(expectedPatientItems, patientItems) def test_updateClinicalItemCounts(self): self.analyzer.updateClinicalItemCounts() clinicalItemQueryClinicalCounts = \ """ select clinical_item_id, name, analysis_status, item_count, patient_count, patient_count, encounter_count from clinical_item where clinical_item_id < 0 order by clinical_item_id desc """ # Expect counts to default to zero if no values known expectedClinicalItemCounts = \ [ [-1, "CBC",1, 0, 0, 0, 0], [-2, "BMP",0, 1, 1, 1, 1], [-3, "Hepatic Panel",1, 0, 0, 0, 0], [-4, "Cardiac Enzymes",1, 0, 0, 0, 0], [-5, "CXR",1, 0, 0, 0, 0], [-6, "RUQ Ultrasound",1, 2, 2, 2, 2], [-7, "CT Abdomen/Pelvis",1, 1, 1, 1, 1], [-8, "CT PE Thorax",1, 0, 0, 0, 0], [-9, "Acetaminophen",1, 0, 0, 0, 0], [-10, "Carvedilol",1, 0, 0, 0, 0], [-11, "Enoxaparin",1, 3, 2, 2, 2], # Two instances occur for the same patient [-12, "Warfarin",1, 0, 0, 0, 0], [-13, "Ceftriaxone",1, 0, 0, 0, 0], [-14, "Foley Catheter",1, 0, 0, 0, 0], [-15, "Strict I&O",1, 0, 0, 0, 0], [-16, "Fall Precautions",1, 0, 0, 0, 0], ] clinicalItemCounts = DBUtil.execute( clinicalItemQueryClinicalCounts ) #Queries test DB to see what is stored in there self.assertEqualTable(expectedClinicalItemCounts, clinicalItemCounts) def test_resetAssociationModel(self): self.analyzer.updateClinicalItemCounts() # Generate clinical item counts based on patient item data ciaCount = DBUtil.execute( "select count(*) from clinical_item_association")[0][0] piCount = DBUtil.execute("select count(*) from patient_item")[0][0] piAnalyzedCount = DBUtil.execute( "select count(*) from patient_item where analyze_date is not null" )[0][0] cacheCount = DBUtil.execute( "select count(*) from data_cache where data_key in ('analyzedPatientCount')" )[0][0] itemCountSummary = DBUtil.execute( "select sum(item_count) from clinical_item")[0][0] self.assertTrue(ciaCount > 0) self.assertTrue(piCount > 0) self.assertTrue(piAnalyzedCount > 0) #self.assertTrue(cacheCount > 0); self.assertTrue(itemCountSummary > 0) self.analyzer.resetAssociationModel() ciaCount2 = DBUtil.execute( "select count(*) from clinical_item_association")[0][0] piCount2 = DBUtil.execute("select count(*) from patient_item")[0][0] piAnalyzedCount2 = DBUtil.execute( "select count(*) from patient_item where analyze_date is not null" )[0][0] cacheCount2 = DBUtil.execute( "select count(*) from data_cache where data_key in ('analyzedPatientCount')" )[0][0] itemCountSummary2 = DBUtil.execute( "select sum(item_count) from clinical_item")[0][0] self.assertEqual(0, ciaCount2) self.assertEqual(piCount, piCount2) self.assertEqual(0, piAnalyzedCount2) self.assertEqual(0, cacheCount2) self.assertEqual(0, itemCountSummary2)
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() log.info("Populate the database with test data") self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = [ "clinical_item_id", "clinical_item_category_id", "name", "analysis_status" ] dataModels = \ [ RowItemModel( [-1, -1, "CBC",1], headers ), RowItemModel( [-2, -1, "BMP",0], headers ), # Clear analysis status, so this will be ignored unless changed RowItemModel( [-3, -1, "Hepatic Panel",1], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes",1], headers ), RowItemModel( [-5, -2, "CXR",1], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound",1], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis",1], headers ), RowItemModel( [-8, -2, "CT PE Thorax",1], headers ), RowItemModel( [-9, -3, "Acetaminophen",1], headers ), RowItemModel( [-10, -3, "Carvedilol",1], headers ), RowItemModel( [-11, -3, "Enoxaparin",1], headers ), RowItemModel( [-12, -3, "Warfarin",1], headers ), RowItemModel( [-13, -3, "Ceftriaxone",1], headers ), RowItemModel( [-14, -4, "Foley Catheter",1], headers ), RowItemModel( [-15, -4, "Strict I&O",1], headers ), RowItemModel( [-16, -4, "Fall Precautions",1], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) self.clinicalItemQuery = \ """ select clinical_item_id, name, analysis_status, default_recommend from clinical_item where clinical_item_id < 0 order by clinical_item_id desc """ headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-4, -11111, -10, datetime(2000, 1, 2, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-10, -22222, -7, datetime(2000, 1, 5, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-12, -22222, -6, datetime(2000, 1, 9, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-14, -33333, -6, datetime(2000, 2, 9, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-15, -33333, -2, datetime(2000, 2,11, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-16, -33333, -11, datetime(2000, 2,11, 0), datetime(2100, 1, 1, 0)], headers ), RowItemModel( [-17, -33333, -11, datetime(2001, 1, 1, 0), datetime(2100, 1, 1, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) self.patientItemQuery = \ """ select patient_item_id, patient_id, clinical_item_id, item_date, analyze_date from patient_item where clinical_item_id < 0 order by patient_id desc, item_date, patient_item_id desc """ headers = [ "clinical_item_id","subsequent_item_id",\ "count_0","count_3600","count_86400","count_604800","count_any", "time_diff_sum","time_diff_sum_squares", "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any", "patient_time_diff_sum","patient_time_diff_sum_squares", "patient_count_0","encounter_count_0", ] dataModels = \ [ RowItemModel( [-11,-11, 3, 3, 3, 3, 4, 999.0, 9999.0, 2, 2, 2, 2, 2, 999.0, 9999.0, 2,2], headers ), RowItemModel( [-11, -7, 0, 0, 0, 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0.0, 0.0, 0,0], headers ), RowItemModel( [-11, -6, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [-11, -2, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [ -7,-11, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0,0], headers ), RowItemModel( [ -7, -7, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [ -7, -6, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0, 0, 0, 1, 1, 345600.0, 119439360000.0, 0,0], headers ), RowItemModel( [ -6,-11, 1, 1, 1, 2, 2, 172800.0, 29859840000.0, 1, 1, 1, 2, 2, 172800.0, 29859840000.0, 1,1], headers ), RowItemModel( [ -6, -7, 0, 0, 0, 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0.0, 0.0, 0,0], headers ), RowItemModel( [ -6, -6, 2, 2, 2, 2, 2, 0.0, 0.0, 2, 2, 2, 2, 2, 0.0, 0.0, 2,2], headers ), RowItemModel( [ -6, -2, 0, 0, 0, 1, 1, 172800.0, 29859840000.0, 0, 0, 0, 1, 1, 172800.0, 29859840000.0, 0,0], headers ), RowItemModel( [ -2,-11, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [ -2, -7, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), RowItemModel( [ -2, -6, 0, 0, 0, 0, 0, 0.0, 0.0, 0, 0, 0, 0, 0, 0.0, 0.0, 0,0], headers ), RowItemModel( [ -2, -2, 1, 1, 1, 1, 1, 0.0, 0.0, 1, 1, 1, 1, 1, 0.0, 0.0, 1,1], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) self.clinicalItemAssociationQuery = \ """ select clinical_item_id, subsequent_item_id, count_0,count_3600,count_86400,count_604800,count_any, time_diff_sum,time_diff_sum_squares, patient_count_0,patient_count_3600,patient_count_86400,patient_count_604800,patient_count_any, patient_time_diff_sum, patient_time_diff_sum_squares from clinical_item_association where clinical_item_id < 0 order by clinical_item_id, subsequent_item_id """ self.analyzer = DataManager() # Instance to test on self.analyzer.maxClinicalItemId = 0
class TestItemRecommender(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() log.info("Populate the database with test data") self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = ["clinical_item_id", "clinical_item_category_id", "name"] dataModels = \ [ RowItemModel( [-1, -1, "CBC"], headers ), RowItemModel( [-2, -1, "BMP"], headers ), RowItemModel( [-3, -1, "Hepatic Panel"], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ), RowItemModel( [-5, -2, "CXR"], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ), RowItemModel( [-8, -2, "CT PE Thorax"], headers ), RowItemModel( [-9, -3, "Acetaminophen"], headers ), RowItemModel( [-10, -3, "Carvedilol"], headers ), RowItemModel( [-11, -3, "Enoxaparin"], headers ), RowItemModel( [-12, -3, "Warfarin"], headers ), RowItemModel( [-13, -3, "Ceftriaxone"], headers ), RowItemModel( [-14, -4, "Foley Catheter"], headers ), RowItemModel( [-15, -4, "Strict I&O"], headers ), RowItemModel( [-16, -4, "Fall Precautions"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-4, -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-10, -22222, -7, datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-12, -22222, -6, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-14, -33333, -6, datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-15, -33333, -2, datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = \ [ "clinical_item_id","subsequent_item_id", "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any", "time_diff_sum", "time_diff_sum_squares", ] dataModels = \ [ RowItemModel( [ -1, -1, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -2, -2, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -3, -3, 95, 95, 97, 97, 97, 0.0, 0.0], headers ), RowItemModel( [ -4, -4, 40, 40, 40, 40, 40, 0.0, 0.0], headers ), RowItemModel( [ -5, -5, 40, 40, 50, 50, 50, 0.0, 0.0], headers ), RowItemModel( [ -6, -6, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -2, -3, 0, 0, 0, 0, 0, 0.0, 0.0], headers ), # Zero count associations, probably shouldn't even be here. If so, ignore them anyway RowItemModel( [ -2, -4, 0, 2, 3, 3, 3, 200.0, 50000.0], headers ), RowItemModel( [ -2, -6, 2, 2, 5, 5, 5, 300.0, 11990.0], headers ), RowItemModel( [ -3, -1, 20, 23, 23, 23, 23, 400.0, 344990.0], headers ), RowItemModel( [ -4, -5, 3, 3, 13, 43, 43, 340.0, 343110.0], headers ), RowItemModel( [ -4, -6, 23, 33, 33, 33, 63, 420.0, 245220.0], headers ), RowItemModel( [ -4, -7, 23, 33, 33, 33, 63, 40.0, 5420.0], headers ), RowItemModel( [ -5, -4, 0, 0, 20, 20, 20, 540.0, 54250.0], headers ), RowItemModel( [ -6, -2, 7, 7, 7, 7, 7, 1.0, 1.0], headers ), RowItemModel( [ -6, -4, 20, 20, 20, 20, 20, 1.0, 1.0], headers ), ] for dataModel in dataModels: # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval) for header in headers: if header.startswith("patient_count_"): timeStr = header[len("patient_count_"):] dataModel["count_%s" % timeStr] = dataModel[header] # Copy over value if timeStr != "0" and dataModel[header] > 5: dataModel["count_%s" % timeStr] += 5 (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) # Indicate that cache data needs to be updated self.dataManager = DataManager() self.dataManager.clearCacheData("analyzedPatientCount") self.dataManager.clearCacheData("clinicalItemCountsUpdated") self.recommender = ItemAssociationRecommender() # Instance to test on def tearDown(self): """Restore state from any setUp or test steps""" log.info("Purge test records from the database") DBUtil.execute( "delete from clinical_item_association where clinical_item_id < 0") DBUtil.execute("delete from patient_item where patient_item_id < 0") DBUtil.execute("delete from clinical_item where clinical_item_id < 0") DBUtil.execute( "delete from clinical_item_category where clinical_item_category_id in (%s)" % str.join(",", self.clinicalItemCategoryIdStrList)) DBTestCase.tearDown(self) def test_recommender(self): # Run the recommender against the mock test data above and verify expected stats afterwards. query = RecommenderQuery() #query.queryItemIds = set(); #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data log.debug( "Query with no item key input, just return ranks by general likelihood then." ) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-3], headers ), RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with key item inputs for which no data exists. Effecitvely ignore it then, so just return ranks by general likelihood." ) query.queryItemIds = set([-100]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-3], headers ), RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("Query with category filter on recommended results.") query.queryItemIds = set([-100]) query.excludeCategoryIds = set([-1, -4, -5, -6]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with category filter and specific exclusion filter on recommended results." ) query.queryItemIds = set([-100]) query.excludeItemIds = set([-6]) query.excludeCategoryIds = set([-1, -4, -5, -6]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query with a couple of input clinical items + one with no association data (should effectively be ignored)." ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-4], headers ), RowItemModel( [-6], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query but set a limit on time delta worth counting item associations" ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), RowItemModel( [-4], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with category limit") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set([-2, -4, -5, -6]) query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-4], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with specific exclusion") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set([-4, -3, -2]) query.excludeCategoryIds = set() query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) def test_recommender_aggregation(self): # Test different scoring aggregation methods query = RecommenderQuery() query.countPrefix = "patient_" query.queryItemIds = set([-2, -5]) #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data headers = ["clinical_item_id", "conditionalFreq", "freqRatio"] # Default weighted aggregation method expectedData = \ [ RowItemModel( [-4, 0.3, 22.5], headers ), RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Change to unweighted aggregation method query.aggregationMethod = "unweighted" expectedData = \ [ RowItemModel( [-4, 0.32857, 24.64286], headers ), RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Change to Serial Bayes aggregation method query.aggregationMethod = "SerialBayes" expectedData = \ [ RowItemModel( [-4, 0.89157, 66.867471], headers ), RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Naive Bayes aggregation query.aggregationMethod = "NaiveBayes" expectedData = \ [ RowItemModel( [-4, 3.75, 281.25], headers ), # Without truncating negative values #RowItemModel( [-4, 0.8, 58.59707], headers ), # With truncating negative values RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Apply value filter query.fieldFilters["freqRatio>"] = 10.0 expectedData = \ [ RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) def assertEqualRecommendedData(self, expectedData, recommendedData, query): """Run assertEqualGeneral on the key components of the contents of the recommendation data. Don't necessarily care about the specific numbers that come out of the recommendations, but do care about consistency in rankings and relative order by the query.sortField """ lastScore = None for expectedItem, recommendedItem in zip(expectedData, recommendedData): # Ensure derived statistics are populated to enable comparisons ItemAssociationRecommender.populateDerivedStats( recommendedItem, expectedItem.keys()) self.assertEqualDict(expectedItem, recommendedItem, ["clinical_item_id"]) for key in expectedItem.iterkeys( ): # If specified, then verify a specific values if isinstance(expectedItem[key], float): self.assertAlmostEquals(expectedItem[key], recommendedItem[key], 5) else: self.assertEqual(expectedItem[key], recommendedItem[key]) if lastScore is not None: self.assertTrue(recommendedItem[query.sortField] <= lastScore) # Verify descending order of scores lastScore = recommendedItem[query.sortField] self.assertEqual(len(expectedData), len(recommendedData)) def test_recommender_stats(self): # Run the recommender against the mock test data above and verify expected stats calculations query = RecommenderQuery() query.parseParams \ ( { "countPrefix": "patient_", "queryItemIds": "-6", "resultCount": "3", # Just get top 3 ranks for simplicity "maxRecommendedId": "0", # Artificial constraint to focus only on test data "sortField": "P-Fisher", # Specifically request derived expected vs. observed stats } ) log.debug("Query with single item not perturbed by others.") headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "P-Fisher" ] expectedData = \ [ RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 7.0, 0.1, 0.0100, 10.0, 3.7e-06], headers ), RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0, 0.286, 0.0133, 21.42857, 1.2e-23], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedDataStats(expectedData, recommendedData, headers) log.debug("Query for non-unique counts.") query.parseParams \ ( { "countPrefix": "", "sortField": "oddsRatio", } ) headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "oddsRatio" ] expectedData = \ [ RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0, 0.35714, 0.01333, 26.7857, 107.96296], headers ), RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0, 0.1714, 0.01, 17.1429, 33.47126], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedDataStats(expectedData, recommendedData, headers) def assertEqualRecommendedDataStats(self, expectedData, recommendedData, headers): """Run assertEqualGeneral on the key components of the contents of the recommendation data. In this case, we do want to verify actual score / stat values match """ self.assertEqual(len(expectedData), len(recommendedData)) for expectedItem, recommendedItem in zip(expectedData, recommendedData): # Ensure the recommendedData has all fields of interest populated / calculated ItemAssociationRecommender.populateDerivedStats( recommendedItem, headers) for header in headers: expectedValue = expectedItem[header] recommendedValue = recommendedItem[header] msg = 'Dicts diff with key (%s). Verify = %s, Sample = %s' % ( header, expectedValue, recommendedValue) self.assertAlmostEquals(expectedValue, recommendedValue, 3, msg) def test_recommender_stats_commandline(self): # Run the recommender against the mock test data above and verify expected stats calculations log.debug("Query with single item not perturbed by others.") headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "P-Fisher" ] expectedData = \ [ RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 7.0, 0.1, 0.0100, 10.0, 3.7e-06], headers ), RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0, 0.286, 0.0133, 21.42857, 1.2e-23], headers ), ] sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "ItemRecommender.py", "maxRecommendedId=0&queryItemIds=-6&countPrefix=patient_&resultCount=3&sortField=P-Fisher", "-" ] self.recommender.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualRecommendedDataStatsTextOutput(expectedData, textOutput, headers) log.debug("Query for non-unique counts.") headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "oddsRatio" ] expectedData = \ [ RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0, 0.35714, 0.01333, 26.7857, 107.96296], headers ), RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0, 0.1714, 0.01, 17.1429, 33.47126], headers ), ] sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "ItemRecommender.py", "maxRecommendedId=0&queryItemIds=-6&countPrefix=&resultCount=3&sortField=oddsRatio", "-" ] self.recommender.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualRecommendedDataStatsTextOutput(expectedData, textOutput, headers) def assertEqualRecommendedDataStatsTextOutput(self, expectedData, textOutput, headers): """Run assertEqualGeneral on the key components of the contents of the recommendation data. In this case, we do want to verify actual score / stat values match """ recommendedData = list() for dataRow in TabDictReader(textOutput): for key, value in dataRow.iteritems(): if key in headers: dataRow[key] = float(value) # Parse into numerical values for comparison recommendedData.append(dataRow) self.assertEqualRecommendedDataStats(expectedData, recommendedData, headers) def test_dataCache(self): # Test that repeating queries with cache turned on will not result in extra DB queries query = RecommenderQuery() query.countPrefix = "patient_" query.queryItemIds = set([-2, -5]) #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data headers = ["clinical_item_id", "conditionalFreq", "freqRatio"] # First query without cache self.recommender.dataManager.dataCache = None baselineData = self.recommender(query) baselineQueryCount = self.recommender.dataManager.queryCount # Redo query with cache self.recommender.dataManager.dataCache = dict() newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) # Ensure getting same results self.assertNotEqual(baselineQueryCount, newQueryCount) # Expect needed more queries since no prior cache baselineQueryCount = newQueryCount # Again, but should be no new query since have cached results last time newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount) # Repeat multiple times, should still have no new query activity # prog = ProgressDots(10,1,"repeats"); for iRepeat in xrange(10): newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount) # prog.update(); # prog.printStatus(); # Query for subset should still yield no new query query.queryItemIds = set([-2]) newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount baselineData = newData # New baseline for subset self.assertEqual(baselineQueryCount, newQueryCount) # Expect no queries for subsets # Repeat query for subset newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount) # Expect no queries for subsets # Query for partial subset, partial new query.queryItemIds = set([-5, -6]) newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount baselineData = newData # New baseline for subset self.assertEqual(baselineQueryCount, newQueryCount) # Expect now new queries for subsets, because first query should have done mass-all query # Repeat for partial subset, no longer new newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount baselineData = newData # New baseline for subset self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount)
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() log.info("Populate the database with test data") self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = ["clinical_item_id", "clinical_item_category_id", "name"] dataModels = \ [ RowItemModel( [-1, -1, "CBC"], headers ), RowItemModel( [-2, -1, "BMP"], headers ), RowItemModel( [-3, -1, "Hepatic Panel"], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ), RowItemModel( [-5, -2, "CXR"], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ), RowItemModel( [-8, -2, "CT PE Thorax"], headers ), RowItemModel( [-9, -3, "Acetaminophen"], headers ), RowItemModel( [-10, -3, "Carvedilol"], headers ), RowItemModel( [-11, -3, "Enoxaparin"], headers ), RowItemModel( [-12, -3, "Warfarin"], headers ), RowItemModel( [-13, -3, "Ceftriaxone"], headers ), RowItemModel( [-14, -4, "Foley Catheter"], headers ), RowItemModel( [-15, -4, "Strict I&O"], headers ), RowItemModel( [-16, -4, "Fall Precautions"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-4, -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-10, -22222, -7, datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-12, -22222, -6, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-14, -33333, -6, datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-15, -33333, -2, datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = \ [ "clinical_item_id","subsequent_item_id", "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any", "time_diff_sum", "time_diff_sum_squares", ] dataModels = \ [ RowItemModel( [ -1, -1, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -2, -2, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -3, -3, 95, 95, 97, 97, 97, 0.0, 0.0], headers ), RowItemModel( [ -4, -4, 40, 40, 40, 40, 40, 0.0, 0.0], headers ), RowItemModel( [ -5, -5, 40, 40, 50, 50, 50, 0.0, 0.0], headers ), RowItemModel( [ -6, -6, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -2, -3, 0, 0, 0, 0, 0, 0.0, 0.0], headers ), # Zero count associations, probably shouldn't even be here. If so, ignore them anyway RowItemModel( [ -2, -4, 0, 2, 3, 3, 3, 200.0, 50000.0], headers ), RowItemModel( [ -2, -6, 2, 2, 5, 5, 5, 300.0, 11990.0], headers ), RowItemModel( [ -3, -1, 20, 23, 23, 23, 23, 400.0, 344990.0], headers ), RowItemModel( [ -4, -5, 3, 3, 13, 43, 43, 340.0, 343110.0], headers ), RowItemModel( [ -4, -6, 23, 33, 33, 33, 63, 420.0, 245220.0], headers ), RowItemModel( [ -4, -7, 23, 33, 33, 33, 63, 40.0, 5420.0], headers ), RowItemModel( [ -5, -4, 0, 0, 20, 20, 20, 540.0, 54250.0], headers ), RowItemModel( [ -6, -2, 7, 7, 7, 7, 7, 1.0, 1.0], headers ), RowItemModel( [ -6, -4, 20, 20, 20, 20, 20, 1.0, 1.0], headers ), ] for dataModel in dataModels: # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval) for header in headers: if header.startswith("patient_count_"): timeStr = header[len("patient_count_"):] dataModel["count_%s" % timeStr] = dataModel[header] # Copy over value if timeStr != "0" and dataModel[header] > 5: dataModel["count_%s" % timeStr] += 5 (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) # Indicate that cache data needs to be updated self.dataManager = DataManager() self.dataManager.clearCacheData("analyzedPatientCount") self.dataManager.clearCacheData("clinicalItemCountsUpdated") self.recommender = ItemAssociationRecommender()
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Populate the database with test data") from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = ["clinical_item_id", "clinical_item_category_id", "name"] dataModels = \ [ RowItemModel( [-1, -1, "CBC"], headers ), RowItemModel( [-2, -1, "BMP"], headers ), RowItemModel( [-3, -1, "Hepatic Panel"], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ), RowItemModel( [-5, -2, "CXR"], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ), RowItemModel( [-8, -2, "CT PE Thorax"], headers ), RowItemModel( [-9, -3, "Acetaminophen"], headers ), RowItemModel( [-10, -3, "Carvedilol"], headers ), RowItemModel( [-11, -3, "Enoxaparin"], headers ), RowItemModel( [-12, -3, "Warfarin"], headers ), RowItemModel( [-13, -3, "Ceftriaxone"], headers ), RowItemModel( [-14, -4, "Foley Catheter"], headers ), RowItemModel( [-15, -4, "Strict I&O"], headers ), RowItemModel( [-16, -4, "Fall Precautions"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 1), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-4, -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-10, -22222, -7, datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-12, -22222, -6, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-14, -33333, -6, datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-15, -33333, -2, datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = \ [ "clinical_item_id","subsequent_item_id", "count_0","count_3600","count_86400","count_604800","count_any", "time_diff_sum", "time_diff_sum_squares", ] dataModels = \ [ RowItemModel( [ -1, -1, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -2, -2, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -3, -3, 95, 95, 97, 97, 97, 0.0, 0.0], headers ), RowItemModel( [ -4, -4, 240,240,240,240,240, 0.0, 0.0], headers ), RowItemModel( [ -5, -5, 40, 40, 50, 50, 50, 0.0, 0.0], headers ), RowItemModel( [ -6, -6, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -7, -7, 35, 35, 35, 50, 80, 0.0, 0.0], headers ), RowItemModel( [ -8, -8, 35, 35, 35, 50, 80, 0.0, 0.0], headers ), RowItemModel( [-10,-10, 45, 45, 55, 60, 90, 0.0, 0.0], headers ), RowItemModel( [-12,-12, 75, 75, 75, 80, 90, 0.0, 0.0], headers ), RowItemModel( [ -2, -4, 0, 2, 3, 3, 3, 200.0, 50000.0], headers ), RowItemModel( [ -2, -6, 2, 2, 5, 5, 5, 300.0, 11990.0], headers ), RowItemModel( [ -3, -1, 20, 23, 23, 23, 23, 400.0, 344990.0], headers ), RowItemModel( [ -4, -5, 3, 3, 13, 43, 43, 340.0, 343110.0], headers ), RowItemModel( [ -4, -6, 23, 33, 33, 33, 63, 420.0, 245220.0], headers ), RowItemModel( [ -4, -7, 27, 33, 33, 33, 83, 40.0, 5420.0], headers ), RowItemModel( [ -4, -8, 1, 2, 3, 4, 5, 40.0, 5420.0], headers ), RowItemModel( [ -4,-10, 25, 35, 40, 45, 73, 47.0, 5420.0], headers ), RowItemModel( [ -5, -4, 0, 0, 20, 20, 20, 540.0, 54250.0], headers ), RowItemModel( [-10, -8, 2, 4, 6, 8, 10, 47.0, 5420.0], headers ), RowItemModel( [-10, -12, 12, 14, 16, 18, 20, 47.0, 5420.0], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) # Indicate that cache data needs tobe updated self.dataManager = DataManager() self.dataManager.clearCacheData("analyzedPatientCount") self.dataManager.clearCacheData("clinicalItemCountsUpdated") # Instance to test on self.analyzer = RecommendationRankingTrendAnalysis()
class TestRecommendationRankingTrendAnalysis(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Populate the database with test data") from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = ["clinical_item_id", "clinical_item_category_id", "name"] dataModels = \ [ RowItemModel( [-1, -1, "CBC"], headers ), RowItemModel( [-2, -1, "BMP"], headers ), RowItemModel( [-3, -1, "Hepatic Panel"], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ), RowItemModel( [-5, -2, "CXR"], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ), RowItemModel( [-8, -2, "CT PE Thorax"], headers ), RowItemModel( [-9, -3, "Acetaminophen"], headers ), RowItemModel( [-10, -3, "Carvedilol"], headers ), RowItemModel( [-11, -3, "Enoxaparin"], headers ), RowItemModel( [-12, -3, "Warfarin"], headers ), RowItemModel( [-13, -3, "Ceftriaxone"], headers ), RowItemModel( [-14, -4, "Foley Catheter"], headers ), RowItemModel( [-15, -4, "Strict I&O"], headers ), RowItemModel( [-16, -4, "Fall Precautions"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 1), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-4, -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-10, -22222, -7, datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-12, -22222, -6, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-14, -33333, -6, datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-15, -33333, -2, datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = \ [ "clinical_item_id","subsequent_item_id", "count_0","count_3600","count_86400","count_604800","count_any", "time_diff_sum", "time_diff_sum_squares", ] dataModels = \ [ RowItemModel( [ -1, -1, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -2, -2, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -3, -3, 95, 95, 97, 97, 97, 0.0, 0.0], headers ), RowItemModel( [ -4, -4, 240,240,240,240,240, 0.0, 0.0], headers ), RowItemModel( [ -5, -5, 40, 40, 50, 50, 50, 0.0, 0.0], headers ), RowItemModel( [ -6, -6, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -7, -7, 35, 35, 35, 50, 80, 0.0, 0.0], headers ), RowItemModel( [ -8, -8, 35, 35, 35, 50, 80, 0.0, 0.0], headers ), RowItemModel( [-10,-10, 45, 45, 55, 60, 90, 0.0, 0.0], headers ), RowItemModel( [-12,-12, 75, 75, 75, 80, 90, 0.0, 0.0], headers ), RowItemModel( [ -2, -4, 0, 2, 3, 3, 3, 200.0, 50000.0], headers ), RowItemModel( [ -2, -6, 2, 2, 5, 5, 5, 300.0, 11990.0], headers ), RowItemModel( [ -3, -1, 20, 23, 23, 23, 23, 400.0, 344990.0], headers ), RowItemModel( [ -4, -5, 3, 3, 13, 43, 43, 340.0, 343110.0], headers ), RowItemModel( [ -4, -6, 23, 33, 33, 33, 63, 420.0, 245220.0], headers ), RowItemModel( [ -4, -7, 27, 33, 33, 33, 83, 40.0, 5420.0], headers ), RowItemModel( [ -4, -8, 1, 2, 3, 4, 5, 40.0, 5420.0], headers ), RowItemModel( [ -4,-10, 25, 35, 40, 45, 73, 47.0, 5420.0], headers ), RowItemModel( [ -5, -4, 0, 0, 20, 20, 20, 540.0, 54250.0], headers ), RowItemModel( [-10, -8, 2, 4, 6, 8, 10, 47.0, 5420.0], headers ), RowItemModel( [-10, -12, 12, 14, 16, 18, 20, 47.0, 5420.0], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) # Indicate that cache data needs tobe updated self.dataManager = DataManager() self.dataManager.clearCacheData("analyzedPatientCount") self.dataManager.clearCacheData("clinicalItemCountsUpdated") # Instance to test on self.analyzer = RecommendationRankingTrendAnalysis() def tearDown(self): """Restore state from any setUp or test steps""" log.info("Purge test records from the database") DBUtil.execute( "delete from clinical_item_association where clinical_item_id < 0") DBUtil.execute("delete from patient_item where patient_item_id < 0") DBUtil.execute("delete from clinical_item where clinical_item_id < 0") DBUtil.execute( "delete from clinical_item_category where clinical_item_category_id in (%s)" % str.join(",", self.clinicalItemCategoryIdStrList)) DBTestCase.tearDown(self) def test_recommenderAnalysis(self): # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery() analysisQuery.patientIds = set([-11111]) analysisQuery.recommender = BaselineFrequencyRecommender() #analysisQuery.recommender = ItemAssociationRecommender(); analysisQuery.baseRecQuery = RecommenderQuery() analysisQuery.baseRecQuery.maxRecommendedId = 0 # Restrict to test data # Don't use items whose default is to be excluded from recommendations #recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(conn=conn); #recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds(conn=conn); #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) ); # Time delta to use for queries, otherwise just default to all times colNames = [ "patient_id", "clinical_item_id", "iItem", "iRecItem", "recRank", "recScore" ] # Start with default recommender expectedResults = \ [ (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT), #0.170), Don't care about specific scores, as long as ranks are correct (-11111,-10, 1, 1, 4, SENTINEL_ANY_FLOAT), #0.032), (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT), #0.025), (-11111,-12, 4, 3, 2, SENTINEL_ANY_FLOAT), #0.053), ] analysisResults = self.analyzer(analysisQuery) self.assertEqualTable(expectedResults, analysisResults, 3) # Now try targeted recommender analysisQuery.recommender = ItemAssociationRecommender() expectedResults = \ [ (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT), #0.167), (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT), #0.304), (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT), #0.190), (-11111,-12, 4, 3, 1, SENTINEL_ANY_FLOAT), #0.444), ] analysisResults = self.analyzer(analysisQuery) self.assertEqualTable(expectedResults, analysisResults, 3) # Repeat, but put a limit on maximum number of query items and recommendations we want analyzed analysisQuery.queryItemMax = 2 expectedResults = \ [ (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT), #0.167), (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT), #0.304), ] analysisResults = self.analyzer(analysisQuery) self.assertEqualTable(expectedResults, analysisResults, 3)
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Populate the database with test data") from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = [ "clinical_item_id", "clinical_item_category_id", "name", "analysis_status" ] dataModels = \ [ RowItemModel( [-1, -1, "CBC",1], headers ), RowItemModel( [-2, -1, "BMP",0], headers ), # Clear analysis status, so this will be ignored unless changed RowItemModel( [-3, -1, "Hepatic Panel",1], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes",1], headers ), RowItemModel( [-5, -2, "CXR",1], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound",1], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis",1], headers ), RowItemModel( [-8, -2, "CT PE Thorax",1], headers ), RowItemModel( [-9, -3, "Acetaminophen",1], headers ), RowItemModel( [-10, -3, "Carvedilol",1], headers ), RowItemModel( [-11, -3, "Enoxaparin",1], headers ), RowItemModel( [-12, -3, "Warfarin",1], headers ), RowItemModel( [-13, -3, "Ceftriaxone",1], headers ), RowItemModel( [-14, -4, "Foley Catheter",1], headers ), RowItemModel( [-15, -4, "Strict I&O",1], headers ), RowItemModel( [-16, -4, "Fall Precautions",1], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "encounter_id", "patient_id", "clinical_item_id", "item_date" ] dataModels = \ [ RowItemModel( [-1, -111, -11111, -4, datetime(2000, 1, 1, 0)], headers ), RowItemModel( [-2, -111, -11111, -10, datetime(2000, 1, 1, 0)], headers ), RowItemModel( [-3, -111, -11111, -8, datetime(2000, 1, 1, 2)], headers ), RowItemModel( [-4, -112, -11111, -10, datetime(2000, 1, 2, 0)], headers ), RowItemModel( [-5, -112, -11111, -12, datetime(2000, 2, 1, 0)], headers ), RowItemModel( [-10, -222, -22222, -7, datetime(2000, 1, 5, 0)], headers ), RowItemModel( [-12, -222, -22222, -6, datetime(2000, 1, 9, 0)], headers ), RowItemModel( [-13, -222, -22222, -11, datetime(2000, 1, 9, 0)], headers ), RowItemModel( [-95, -222, -22222, -9, datetime(2000, 1,10, 0)], headers ), RowItemModel( [-94, -333, -33333, -8, datetime(2000, 1,10, 0)], headers ), # In first window delta unit only RowItemModel( [-14, -333, -33333, -6, datetime(2000, 2, 9, 0)], headers ), RowItemModel( [-15, -333, -33333, -2, datetime(2000, 2,11, 0)], headers ), # Will set clinical_item_link inheritances to this item to only record certain associations RowItemModel( [-16, -333, -33333, -11, datetime(2000, 2,11, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = ["clinical_item_id", "linked_item_id"] dataModels = \ [ # Don't have direct, but instead demonstrate inherited relationship from 6 to 2 will still be recognized RowItemModel( [-6, -4], headers ), RowItemModel( [-4, -2], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_link", dataModel) self.decayAnalyzer = DecayingWindows( ) # DecayingWindows instance to test on, *** remember to change database to medinfo_copy self.dataManager = DataManager()
class TestDecayingWindows(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) log.info("Populate the database with test data") from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = [ "clinical_item_id", "clinical_item_category_id", "name", "analysis_status" ] dataModels = \ [ RowItemModel( [-1, -1, "CBC",1], headers ), RowItemModel( [-2, -1, "BMP",0], headers ), # Clear analysis status, so this will be ignored unless changed RowItemModel( [-3, -1, "Hepatic Panel",1], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes",1], headers ), RowItemModel( [-5, -2, "CXR",1], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound",1], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis",1], headers ), RowItemModel( [-8, -2, "CT PE Thorax",1], headers ), RowItemModel( [-9, -3, "Acetaminophen",1], headers ), RowItemModel( [-10, -3, "Carvedilol",1], headers ), RowItemModel( [-11, -3, "Enoxaparin",1], headers ), RowItemModel( [-12, -3, "Warfarin",1], headers ), RowItemModel( [-13, -3, "Ceftriaxone",1], headers ), RowItemModel( [-14, -4, "Foley Catheter",1], headers ), RowItemModel( [-15, -4, "Strict I&O",1], headers ), RowItemModel( [-16, -4, "Fall Precautions",1], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "encounter_id", "patient_id", "clinical_item_id", "item_date" ] dataModels = \ [ RowItemModel( [-1, -111, -11111, -4, datetime(2000, 1, 1, 0)], headers ), RowItemModel( [-2, -111, -11111, -10, datetime(2000, 1, 1, 0)], headers ), RowItemModel( [-3, -111, -11111, -8, datetime(2000, 1, 1, 2)], headers ), RowItemModel( [-4, -112, -11111, -10, datetime(2000, 1, 2, 0)], headers ), RowItemModel( [-5, -112, -11111, -12, datetime(2000, 2, 1, 0)], headers ), RowItemModel( [-10, -222, -22222, -7, datetime(2000, 1, 5, 0)], headers ), RowItemModel( [-12, -222, -22222, -6, datetime(2000, 1, 9, 0)], headers ), RowItemModel( [-13, -222, -22222, -11, datetime(2000, 1, 9, 0)], headers ), RowItemModel( [-95, -222, -22222, -9, datetime(2000, 1,10, 0)], headers ), RowItemModel( [-94, -333, -33333, -8, datetime(2000, 1,10, 0)], headers ), # In first window delta unit only RowItemModel( [-14, -333, -33333, -6, datetime(2000, 2, 9, 0)], headers ), RowItemModel( [-15, -333, -33333, -2, datetime(2000, 2,11, 0)], headers ), # Will set clinical_item_link inheritances to this item to only record certain associations RowItemModel( [-16, -333, -33333, -11, datetime(2000, 2,11, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = ["clinical_item_id", "linked_item_id"] dataModels = \ [ # Don't have direct, but instead demonstrate inherited relationship from 6 to 2 will still be recognized RowItemModel( [-6, -4], headers ), RowItemModel( [-4, -2], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_link", dataModel) self.decayAnalyzer = DecayingWindows( ) # DecayingWindows instance to test on, *** remember to change database to medinfo_copy self.dataManager = DataManager() def tearDown(self): """Restore state from any setUp or test steps""" log.info("Purge test records from the database") DBUtil.execute( "delete from clinical_item_link where clinical_item_id < 0") DBUtil.execute( "delete from clinical_item_association where clinical_item_id < 0") DBUtil.execute("delete from patient_item where patient_item_id < 0") DBUtil.execute("delete from clinical_item where clinical_item_id < 0") DBUtil.execute( "delete from clinical_item_category where clinical_item_category_id in (%s)" % str.join(",", self.clinicalItemCategoryIdStrList)) # Purge temporary buffer files. May not match exact name if modified for other purpose for filename in os.listdir("."): if filename.startswith(TEMP_FILENAME): os.remove(filename) DBTestCase.tearDown(self) def test_decayingWindowsFromBuffer(self): associationQuery = \ """ select clinical_item_id, subsequent_item_id, count_0, count_3600, count_86400, count_604800, count_2592000, count_7776000, count_31536000, count_any from clinical_item_association where clinical_item_id < 0 order by clinical_item_id, subsequent_item_id """ decayAnalysisOptions = DecayAnalysisOptions() decayAnalysisOptions.startD = datetime(2000, 1, 9) decayAnalysisOptions.endD = datetime(2000, 2, 11) #decayAnalysisOptions.windowLength = 10 decayAnalysisOptions.decay = 0.9 decayAnalysisOptions.delta = timedelta(weeks=4) decayAnalysisOptions.patientIds = [-22222, -33333] decayAnalysisOptions.outputFile = TEMP_FILENAME self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions) expectedAssociationStats = \ [ [-11,-11, 1.9, 1.9, 1.9, 1.9, 1.9, 0, 0, 1.9], # Note that decaying windows approach will not try to update counts for time periods longer than the delta period [-11, -9, 0.0, 0.0, 0.9, 0.9, 0.9, 0, 0, 0.9], [-11, -8, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. Consider future upgrade. Don't train on all time ever, but train on two deltas at a time, sliding / shifting window so do catch the overlap ranges [-11, -6, 0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9], [ -9,-11, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -9, -9, 0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9], [ -9, -8, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -9, -6, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -8,-11, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. [ -8, -9, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. [ -8, -8, 0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9], [ -8, -6, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. [ -6,-11, 0.9, 0.9, 0.9, 1.9, 1.9, 0, 0, 1.9], [ -6, -9, 0.0, 0.0, 0.9, 0.9, 0.9, 0, 0, 0.9], [ -6, -8, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. [ -6, -6, 1.9, 1.9, 1.9, 1.9, 1.9, 0, 0, 1.9], ] associationStats = DBUtil.execute(associationQuery) #for row in expectedAssociationStats: # print >> sys.stderr, row; #print >> sys.stderr, "============" #for row in associationStats: # print >> sys.stderr, row; #print >> sys.stderr, "============" self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) expectedItemBaseCountById = \ { -1: 0, -2: 0, -3: 0, -4: 0, -5: 0, -6: 1.9, -7: 0, -8: 0.9, -9: 0.9, -10: 0, -11: 1.9, -12: 0, -13: 0, -14: 0, -15: 0, -16: 0, } itemBaseCountById = self.dataManager.loadClinicalItemBaseCountByItemId( ) #print >> sys.stderr, itemBaseCountById; self.assertEqualDict(expectedItemBaseCountById, itemBaseCountById) ######## Reset the model data and rerun with different decay parameters self.dataManager.resetAssociationModel() decayAnalysisOptions = DecayAnalysisOptions() decayAnalysisOptions.startD = datetime(2000, 1, 9) decayAnalysisOptions.endD = datetime(2000, 2, 11) decayAnalysisOptions.windowLength = 4 # Just specify window length, then should calculate decay parameter #decayAnalysisOptions.decay = 0.9 decayAnalysisOptions.delta = timedelta(weeks=4) decayAnalysisOptions.patientIds = [-22222, -33333] decayAnalysisOptions.outputFile = TEMP_FILENAME self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions) expectedAssociationStats = \ [ [-11,-11, 1.75, 1.75, 1.75, 1.75, 1.75, 0, 0, 1.75], [-11, -9, 0.0, 0.0, 0.75, 0.75, 0.75, 0, 0, 0.75], [-11, -8, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [-11, -6, 0.75, 0.75, 0.75, 0.75, 0.75, 0, 0, 0.75], [ -9,-11, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -9, -9, 0.75, 0.75, 0.75, 0.75, 0.75, 0, 0, 0.75], [ -9, -8, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -9, -6, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -8,-11, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -8, -9, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -8, -8, 0.75, 0.75, 0.75, 0.75, 0.75, 0, 0, 0.75], [ -8, -6, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -6,-11, 0.75, 0.75, 0.75, 1.75, 1.75, 0, 0, 1.75], [ -6, -9, 0.0, 0.0, 0.75, 0.75, 0.75, 0, 0, 0.75], [ -6, -8, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -6, -6, 1.75, 1.75, 1.75, 1.75, 1.75, 0, 0, 1.75], ] associationStats = DBUtil.execute(associationQuery) #for row in expectedAssociationStats: # print >> sys.stderr, row; #print >> sys.stderr, "============" #for row in associationStats: # print >> sys.stderr, row; #print >> sys.stderr, "============" self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) expectedItemBaseCountById = \ { -1: 0, -2: 0, -3: 0, -4: 0, -5: 0, -6: 1.75, -7: 0, -8: 0.75, -9: 0.75, -10: 0, -11: 1.75, -12: 0, -13: 0, -14: 0, -15: 0, -16: 0, } itemBaseCountById = self.dataManager.loadClinicalItemBaseCountByItemId( acceptCache=False) # Don't use cache, otherwise will get prior results #print >> sys.stderr, itemBaseCountById; self.assertEqualDict(expectedItemBaseCountById, itemBaseCountById) def test_decayingWindows(self): # Muthu's function to test DecayingWindows module associationQuery = \ """ select clinical_item_id, subsequent_item_id, patient_count_0, patient_count_3600, patient_count_86400, patient_count_604800, patient_count_2592000, patient_count_7776000, patient_count_31536000, patient_count_any from clinical_item_association where clinical_item_id < 0 order by clinical_item_id, subsequent_item_id """ decayAnalysisOptions = DecayAnalysisOptions() decayAnalysisOptions.startD = datetime(2000, 1, 9) decayAnalysisOptions.endD = datetime(2000, 2, 11) decayAnalysisOptions.windowLength = 10 decayAnalysisOptions.decay = 0.9 decayAnalysisOptions.delta = timedelta(weeks=4) decayAnalysisOptions.patientIds = [-22222, -33333] self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions) expectedAssociationStats = \ [ [-11,-11, 1.9, 1.9, 1.9, 1.9, 1.9, 0, 0, 1.9], # Note that decaying windows approach will not try to update counts for time periods longer than the delta period [-11, -9, 0.0, 0.0, 0.9, 0.9, 0.9, 0, 0, 0.9], [-11, -8, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. Consider future upgrade. Don't train on all time ever, but train on two deltas at a time, sliding / shifting window so do catch the overlap ranges. Problem here is buffer based algorithm, won't be recording analyze_dates as go, so will end up with duplicate counts of items each month? [-11, -6, 0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9], [ -9,-11, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -9, -9, 0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9], [ -9, -8, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -9, -6, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], [ -8,-11, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. [ -8, -9, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. [ -8, -8, 0.9, 0.9, 0.9, 0.9, 0.9, 0, 0, 0.9], [ -8, -6, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. [ -6,-11, 0.9, 0.9, 0.9, 1.9, 1.9, 0, 0, 1.9], [ -6, -9, 0.0, 0.0, 0.9, 0.9, 0.9, 0, 0, 0.9], [ -6, -8, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0.0], # 8 not in same delta as other items so co-occurence not gettign counted. [ -6, -6, 1.9, 1.9, 1.9, 1.9, 1.9, 0, 0, 1.9], ] associationStats = DBUtil.execute(associationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) #DBUtil.execute("delete from clinical_item_association") # Add another training period then should get a second decay multiplier for older data? # Weird in that incrementally building on prior data that is getting decayed, even though new training data actually occurred before chronologic time of data decayAnalysisOptions = DecayAnalysisOptions() decayAnalysisOptions.startD = datetime(2000, 1, 1) decayAnalysisOptions.endD = datetime(2000, 2, 12) decayAnalysisOptions.windowLength = 10 decayAnalysisOptions.decay = 0.9 decayAnalysisOptions.delta = timedelta(weeks=4) decayAnalysisOptions.patientIds = [-22222, -33333] self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions) expectedAssociationStats = \ [ [-11L, -11L, 1.539, 1.539, 1.539, 1.539, 1.539, 0.0, 0.0, 1.539], [-11L, -9L, 0.0, 0.0, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729], [-11L, -8L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-11L, -7L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-11L, -6L, 0.729, 0.729, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729], [-9L, -11L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-9L, -9L, 0.729, 0.729, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729], [-9L, -8L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-9L, -7L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-9L, -6L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-8L, -11L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-8L, -9L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-8L, -8L, 0.729, 0.729, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729], [-8L, -6L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-7L, -11L, 0.0, 0.0, 0.0, 0.9, 0.9, 0.0, 0.0, 0.9], [-7L, -9L, 0.0, 0.0, 0.0, 0.9, 0.9, 0.0, 0.0, 0.9], [-7L, -7L, 0.9, 0.9, 0.9, 0.9, 0.9, 0.0, 0.0, 0.9], [-7L, -6L, 0.0, 0.0, 0.0, 0.9, 0.9, 0.0, 0.0, 0.9], [-6L, -11L, 0.729, 0.729, 0.729, 1.539, 1.539, 0.0, 0.0, 1.539], [-6L, -9L, 0.0, 0.0, 0.729, 0.729, 0.729, 0.0, 0.0, 0.729], [-6L, -8L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-6L, -7L, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [-6L, -6L, 1.539, 1.539, 1.539, 1.539, 1.539, 0.0, 0.0, 1.539], ] associationStats = DBUtil.execute(associationQuery) #for row in expectedAssociationStats: # print >> sys.stderr, row; #print >> sys.stderr, "============" #for row in associationStats: # print >> sys.stderr, row; #print >> sys.stderr, "============" self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) def test_resetModel(self): associationQuery = \ """ select clinical_item_id, subsequent_item_id, patient_count_0, patient_count_3600, patient_count_86400, patient_count_604800, patient_count_2592000, patient_count_7776000, patient_count_31536000, patient_count_any from clinical_item_association where clinical_item_id < 0 order by clinical_item_id, subsequent_item_id """ associationQueryDate = \ """ select patient_item_id, analyze_date from patient_item where patient_item_id < 0 order by patient_item_id """ # fill up the association table with something decayAnalysisOptions = DecayAnalysisOptions() decayAnalysisOptions.startD = datetime(2000, 1, 9) decayAnalysisOptions.endD = datetime(2000, 2, 11) decayAnalysisOptions.windowLength = 10 decayAnalysisOptions.decay = 0.9 decayAnalysisOptions.patientIds = [-22222, -33333] self.decayAnalyzer.decayAnalyzePatientItems(decayAnalysisOptions) # then clear the table self.dataManager.resetAssociationModel() expectedAssociationStats = \ [ ] associationStats = DBUtil.execute(associationQuery) self.assertEqualTable(expectedAssociationStats, associationStats, precision=3) # Set as NULL expectedAssociationStatsDate = \ [[-95, None],[-94, None],[-16, None], [-15, None], [-14, None], [-13, None], [-12, None], [-10, None], [-5, None], [-4, None], [-3, None], [-2, None], [-1, None] ] associationStatsDate = DBUtil.execute(associationQueryDate) #print >> sys.stderr, associationStatsDate self.assertEqualTable(expectedAssociationStatsDate, associationStatsDate)
def __init__(self): self.connFactory = DBUtil.ConnectionFactory() # Default connection source self.dataManager = DataManager()