def test__StartingPopulation(): data = pd.DataFrame( { "original_population": [True] * 20, "eligible_population": [True] * 8 + [False] * 12, "evaluation_group": ["T"] * 2 + ["C"] * 6 + [0] * 12, } ) any_data_id = TreatmentPeriodID( population_slice_id=PopulationSliceID(date="2016-01-01"), time_period=pd.Period("2016-01"), ) results = TreatmentPeriod( id=any_data_id, setup_steps=SetupSteps( steps=[ StartingPopulation( eligible_from_previous_period_col="evaluation_group", starting_pop_label="C", ) ] ), init_data=data, ) print(results.data) # Should be 6 records in new population # ...corresponding to the 6 records in the original control group assert len(results.data) == 6
def test__all_SetupSteps_for_EvaluationModel_population_slices( fixture__population_slice_setup_steps, fixture__population_slice__expected_columns, tmpdir, ): population_slice_generator = PopulationSliceGenerator( setup_steps_by_date={ pd.Timestamp("2016-01-01"): fixture__population_slice_setup_steps }, start=pd.Timestamp("2016-07-01"), end=pd.Timestamp("2016-09-30"), ) data_handler = ModelDataHandler( database_type="sqlite", location=tmpdir, name="jobpath_evaluation", ) evaluation_model = EvaluationModel( data_handler=data_handler, population_slice_generator=population_slice_generator, ) evaluation_model.add_population_slices() results = evaluation_model.population_slices[ PopulationSliceID(date=pd.Timestamp("2016-07-01")) ] assert set(results.data.columns) == set(fixture__population_slice__expected_columns) # Manually check how many people are on LR and eligible assert len(results.data) == 315654 assert len(results.data[results.data["eligible_population"]]) == 86240
def test__all_SetupSteps__subsequent_TreatmentPeriod( fixture__population_slice_setup_steps, fixture__treatment_period_setup_steps, fixture__treatment_period_expected_columns, ): ps = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), setup_steps=fixture__population_slice_setup_steps, ) first_tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-07") ) first_tp = TreatmentPeriod( id=first_tpid, setup_steps=fixture__treatment_period_setup_steps, init_data=ps.data, ) second_tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-08") ) results = TreatmentPeriod( id=second_tpid, setup_steps=fixture__treatment_period_setup_steps, init_data=first_tp.data.copy(), ) assert set(results.data.columns) == set(fixture__treatment_period_expected_columns) len_expected = len(first_tp.data[first_tp.data["eligible_population"]]) - len( first_tp.data[first_tp.data["evaluation_group"] == "T"] ) assert len(results.data) <= len_expected assert len(results.data[results.data["eligible_population"]]) < len(results.data)
def test__EvaluationModel__add_slices(fixture__population_slice_generator,): evaluation_model = EvaluationModel( population_slice_generator=fixture__population_slice_generator, ) evaluation_model.add_population_slices() results = evaluation_model.population_slices[ PopulationSliceID(pd.Timestamp("2016-07-01 00:00:00", freq="QS-JAN")) ] assert results.data.shape == (90, 5,)
def test__AgeEligible__ge_min(fixture__date_of_birth_df): ge_min = AgeEligible(date_of_birth_col="date_of_birth", min_eligible={"years": 25}) results = ge_min.run( PopulationSliceID(date=pd.Timestamp("2016-01-01")), data=fixture__date_of_birth_df, ) # 22 out of 30 records have date_of_birth more than 60 years before date # Should be 2 columns in results df (date_of_birth and age_eligible) assert results.loc[results["age_eligible"]].shape == (25, 2)
def test__ClaimDurationEligible__ge_min(fixture__claim_duration_df): eligible = ClaimDurationEligible( claim_start_col="clm_comm_date", min_eligible={"years": 1} ) results = eligible.run( PopulationSliceID(date=pd.Timestamp("2016-01-01")), data=fixture__claim_duration_df, ) assert results.loc[results["claim_duration_eligible"]].shape == (28, 2)
def test__ClaimCodeEligible(): data = pd.DataFrame( {"lr_code": ["UA", "UB", "UC", "UD", "UE", "UA2", "UB2", "UC2", "UD2", "UE2"]} ) eligible = ClaimCodeEligible(code_col="lr_code", eligible_codes=["UA", "UB"]) results = eligible.run( PopulationSliceID(date=pd.Timestamp("2016-01-01")), data=data ) assert results.loc[results["claim_code_eligible"]].shape == (2, 2)
def test__OnJobPath(): """Test basic case using just JobPath operational data and not ISTS flag. """ eligible = OnJobPath(assumed_episode_length={"years": 1}) psid = PopulationSliceID(date=pd.Timestamp("2016-02-01")) lrp = LiveRegisterPopulation( columns_by_type={"JobPath_Flag": "boolean", "JobPathHold": "boolean",} ) results = eligible.run(data_id=psid, data=lrp.run(psid)) # Manually check number of people on JobPath at start of Feb 2016 == 1441 assert results.loc[results["on_jobpath"]].shape == (1441, 3)
def test__PopulationSlice(fixture__RandomPopulation, fixture__SampleFromPopulation): setup_steps = SetupSteps([ fixture__RandomPopulation(), fixture__SampleFromPopulation(0.1), ]) results = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-01-01")), setup_steps=setup_steps, ) assert results.data.shape == (10, 5)
def fixture__population_slice(fixture__RandomPopulation, fixture__SampleFromPopulation): setup_steps = SetupSteps([ fixture__RandomPopulation(), fixture__SampleFromPopulation(0.1), ]) population_slice = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-01-01")), setup_steps=setup_steps, ) return population_slice
def test__all_SetupSteps_for_PopulationSlice( fixture__population_slice_setup_steps, fixture__population_slice__expected_columns ): results = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), setup_steps=fixture__population_slice_setup_steps, ) assert set(results.data.columns) == set(fixture__population_slice__expected_columns) # Manually check how many people are on LR and eligible assert len(results.data) == 315654 assert len(results.data[results.data["eligible_population"]]) == 86240
def test__JobPathStarts__jobpath_operational(fixture__population_slice_setup_steps): ps = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-01-01")), setup_steps=fixture__population_slice_setup_steps, ) tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-01") ) results = TreatmentPeriod( id=tpid, setup_steps=SetupSteps(steps=[JobPathStarts()]), init_data=ps.data, ) # Apparently 1336 people started JobPath in Jan 2016 assert len(results.data[results.data["jobpath_starts"]]) == 1336
def test__EvaluationGroup(): data = pd.DataFrame( {"eligible": [True] * 8 + [False] * 2, "treatment": [True, False] * 5,} ) any_data_id = TreatmentPeriodID( population_slice_id=PopulationSliceID(date="2016-01-01"), time_period=pd.Period("2016-01"), ) results = TreatmentPeriod( id=any_data_id, setup_steps=SetupSteps( steps=[EvaluationGroup(eligible_col="eligible", treatment_col="treatment")] ), init_data=data, ) assert results.data[results.data["evaluation_group"] == "T"].shape == (4, 3)
def test__ModelDataHandler__run__new(fixture__setup_steps_by_date, fixture__population_slice_generator, tmpdir): data_path = f"sqlite:///{tmpdir}/test.db" data_handler = ModelDataHandler(data_path) population_slice_generator = fixture__population_slice_generator results = { population_slice.id: population_slice for population_slice in population_slice_generator.run(data_handler) } key = PopulationSliceID(date=pd.Timestamp("2016-07-01", freq="QS-JAN")) assert results[key].data.shape == ( 90, 5, )
def test__PopulationSliceGenerator(fixture__setup_steps_by_date, fixture__population_slice_generator): population_slice_generator = fixture__population_slice_generator results = { population_slice.id: population_slice for population_slice in population_slice_generator.run() } key = PopulationSliceID(date=pd.Timestamp("2016-07-01", freq="QS-JAN")) assert results[key].data.shape == ( 90, 5, ) assert population_slice_generator.date_range.equals( pd.date_range(start=pd.Timestamp("2016-01-01"), end=pd.Timestamp("2017-12-31"), freq="QS"))
def test__JobPathStartedEndedSamePeriod(fixture__population_slice_setup_steps): ps = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), setup_steps=fixture__population_slice_setup_steps, ) tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-07") ) results = TreatmentPeriod( id=tpid, setup_steps=SetupSteps(steps=[JobPathStartedEndedSamePeriod()]), init_data=ps.data, ) print(results.data["jobpath_started_and_ended"].value_counts()) print(results.data) # Apparently 2 people started *and* ended JobPath in July 2016 assert len(results.data[results.data["jobpath_started_and_ended"]]) == 2
def test__EvaluationModel__add_periods( fixture__population_slice_generator, fixture__treatment_period_generator ): evaluation_model = EvaluationModel( population_slice_generator=fixture__population_slice_generator, treatment_period_generator=fixture__treatment_period_generator, ) evaluation_model.add_population_slices() evaluation_model.add_treatment_periods() results = evaluation_model.treatment_periods[ TreatmentPeriodID( population_slice_id=PopulationSliceID( pd.Timestamp("2016-01-01", freq="QS-JAN") ), time_period=pd.Period("2016-06", "M"), ) ] assert results.data.shape == (48, 5)
def test__ModelDataHandler__run__existing(fixture__setup_steps_by_date, fixture__population_slice_generator, tmpdir): data_path = f"sqlite:///{tmpdir}/test.db" data_handler = ModelDataHandler(data_path) population_slice_generator = fixture__population_slice_generator # First iteration should run setup_steps then write to storage first_population_slices = { population_slice.id: population_slice for population_slice in population_slice_generator.run(data_handler) } # Second iteration should just read from storage second_population_slices = { population_slice.id: population_slice for population_slice in population_slice_generator.run(data_handler) } key = PopulationSliceID(date=pd.Timestamp("2016-07-01", freq="QS-JAN")) assert (len(first_population_slices[key].data) == len( second_population_slices[key].data))
def test__all_SetupSteps__first_TreatmentPeriod( fixture__treatment_period_setup_steps, fixture__population_slice_setup_steps, fixture__treatment_period_expected_columns, ): ps = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), setup_steps=fixture__population_slice_setup_steps, ) tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-07") ) results = TreatmentPeriod( id=tpid, setup_steps=fixture__treatment_period_setup_steps, init_data=ps.data, ) print(f"Results columns: {results.data.columns}") assert set(results.data.columns) == set(fixture__treatment_period_expected_columns) assert len(results.data) == len(ps.data[ps.data["eligible_population"]]) assert len(results.data[results.data["eligible_population"]]) < len(results.data)
def test__all_SetupSteps_for_EvaluationModel_treatment_periods( fixture__population_slice_setup_steps, fixture__treatment_period_setup_steps, fixture__treatment_period_expected_columns, tmpdir, ): data_handler = ModelDataHandler( database_type="sqlite", location=tmpdir, name="jobpath_evaluation", ) population_slice_generator = PopulationSliceGenerator( setup_steps_by_date={ pd.Timestamp("2016-01-01"): fixture__population_slice_setup_steps }, start=pd.Timestamp("2016-07-01"), end=pd.Timestamp("2016-09-30"), ) treatment_period_generator = TreatmentPeriodGenerator( setup_steps_by_date={ pd.Timestamp("2016-07-01"): fixture__treatment_period_setup_steps }, end=pd.Period("2016-09"), ) evaluation_model = EvaluationModel( data_handler=data_handler, population_slice_generator=population_slice_generator, treatment_period_generator=treatment_period_generator, ) evaluation_model.add_population_slices() evaluation_model.add_treatment_periods() results_id = TreatmentPeriodID( population_slice_id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), time_period=pd.Period("2016-09"), ) results = evaluation_model.treatment_periods[results_id] assert set(results.data.columns) == set(fixture__treatment_period_expected_columns) # Manually check how many people are on LR and eligible assert len(results.data[results.data["eligible_population"]]) < len(results.data) assert len(results.data[results.data["eligible_population"]]) == 64333 assert len(results.data[results.data["jobpath_starts"]]) == 4273