def test_dump_metadata(self, mock_writing): df, _ = self.create_spark_df({"patientID": [1, 2]}) cohort_1 = Cohort("test", "test", df, None) df_events, _ = self.create_spark_df({ "patientID": [1, 2], "category": ["test", "test"] }) cohort_2 = Cohort("events", "events", df, df_events) cc = CohortCollection({"test": cohort_1, "events": cohort_2}) expected = sorted({ "operations": [ { "output_type": "events", "name": "events", "output_path": "../../output/events/data", "population_path": "../../output/events/subjects", }, { "output_type": "patients", "output_path": "../../output/test/subjects", "name": "test", }, ] }) result = sorted(cc.save("../../output")) self.assertEqual(expected, result)
def test_union_all(self, mock_Cohort): cc1 = CohortCollection({ "extract_patients": mock_Cohort, "acts": mock_Cohort }) cc2 = CohortCollection({ "exposures": mock_Cohort, "outcomes": mock_Cohort, "extract_patients": mock_Cohort, }) cc3 = CohortCollection({ "diagnoses": mock_Cohort, "outcomes": mock_Cohort, "extract_patients": mock_Cohort, }) result = CohortCollection.union_all([cc1, cc2, cc3]) expected_cohorts = { "extract_patients", "acts", "outcomes", "exposures", "diagnoses", } self.assertSetEqual(expected_cohorts, result.cohorts_names)
def test_intersect(self, mock_Cohort): cc1 = CohortCollection({ "extract_patients": mock_Cohort, "acts": mock_Cohort }) cc2 = CohortCollection({ "exposures": mock_Cohort, "outcomes": mock_Cohort, "extract_patients": mock_Cohort, }) result = cc1.intersection(cc2) expected_cohorts = {"extract_patients"} self.assertSetEqual(expected_cohorts, result.cohorts_names)
def test_cohort_collection_from_cohort_flow(self): input = """ { "intermediate_operations": { "operation": { "type": "union", "name": "outcome", "parents": ["liberal_fractures", "hospit_fractures"] } }, "cohorts": [ "extract_patients", "exposures", "filter_patients", "outcome" ] } """ df, _ = self.create_spark_df({"patientID": [1, 2, 3]}) cc = CohortCollection({ "liberal_fractures": Cohort("liberal_fractures", "liberal_fractures", df, None), "hospit_fractures": Cohort("hospit_fractures", "hospit_fractures", df, None), }) result = cohort_collection_from_cohort_flow(cc, input) self.assertSetEqual( set(result.cohorts.keys()), {"liberal_fractures", "hospit_fractures", "outcome"}, )
def test_eq(self): df, _ = self.create_spark_df({"patientID": [1, 2]}) cohort_1 = Cohort("test", "test", df, None) df_events, _ = self.create_spark_df({ "patientID": [1, 2], "category": ["test", "test"] }) cohort_2 = Cohort("events", "events", df, df_events) cc1 = CohortCollection({"test": cohort_1, "events": cohort_2}) cc2 = CohortCollection({"test": cohort_1, "events": cohort_2}) self.assertEqual(cc1, cc2) cc3 = CohortCollection({"test1": cohort_1, "events": cohort_2}) self.assertNotEqual(cc1, cc3) df, _ = self.create_spark_df({"patientID": [1, 45]}) cohort_3 = Cohort("test", "test", df, None) cc4 = CohortCollection({"test": cohort_3, "events": cohort_2}) self.assertNotEqual(cc1, cc4) self.assertNotEqual(cc1, df)
def test_from_json(self, mock_read_data_frame): mock_read_data_frame.return_value = self.create_spark_df( {"patientID": [1, 2]}) metadata = { "class_name": "fr.polytechnique.cmap.cnam.study." "pioglitazone.PioglitazoneMain$", "start_timestamp": "2018-07-25T10:13:10Z", "end_timestamp": "2018-07-25T10:45:52Z", "operations": [ { "name": "extract_patients", "inputs": ["DCIR", "MCO", "IR_BEN_R"], "output_type": "patients", "output_path": "/some/path/to/extract_patients/data", }, { "name": "drug_purchases", "inputs": ["DCIR"], "output_type": "dispensations", "output_path": "/some/path/to/drug_purchases/data", "population_path": "/some/path/to/drug_purchases/patients", }, { "name": "diagnoses", "inputs": ["MCO", "IR_IMB_R"], "output_type": "diagnosis", "output_path": "/some/path/to/diagnoses/data", "population_path": "/some/path/to/diagnoses/patients", }, { "name": "acts", "inputs": ["DCIR", "MCO", "MCO_CE"], "output_type": "acts", "output_path": "/some/path/to/acts/data", "population_path": "/some/path/to/acts/patients", }, { "name": "outcomes", "inputs": ["acts", "diagnoses"], "output_type": "outcomes", "output_path": "/some/path/to/outcomes/data", "population_path": "/some/path/to/outcomes/patients", }, { "name": "exposures", "inputs": ["drug_purchases", "followup"], "output_type": "exposures", "output_path": "/some/path/to/exposures/data", "population_path": "/some/path/to/exposures/patients", }, ], } result = CohortCollection.load(metadata) expected_cohorts = { "extract_patients", "drug_purchases", "diagnoses", "acts", "outcomes", "exposures", } self.assertSetEqual(expected_cohorts, result.cohorts_names)