def test__all_SetupSteps__subsequent_TreatmentPeriod( fixture__population_slice_setup_steps, fixture__treatment_period_setup_steps, fixture__treatment_period_expected_columns, ): ps = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), setup_steps=fixture__population_slice_setup_steps, ) first_tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-07") ) first_tp = TreatmentPeriod( id=first_tpid, setup_steps=fixture__treatment_period_setup_steps, init_data=ps.data, ) second_tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-08") ) results = TreatmentPeriod( id=second_tpid, setup_steps=fixture__treatment_period_setup_steps, init_data=first_tp.data.copy(), ) assert set(results.data.columns) == set(fixture__treatment_period_expected_columns) len_expected = len(first_tp.data[first_tp.data["eligible_population"]]) - len( first_tp.data[first_tp.data["evaluation_group"] == "T"] ) assert len(results.data) <= len_expected assert len(results.data[results.data["eligible_population"]]) < len(results.data)
def test__StartingPopulation(): data = pd.DataFrame( { "original_population": [True] * 20, "eligible_population": [True] * 8 + [False] * 12, "evaluation_group": ["T"] * 2 + ["C"] * 6 + [0] * 12, } ) any_data_id = TreatmentPeriodID( population_slice_id=PopulationSliceID(date="2016-01-01"), time_period=pd.Period("2016-01"), ) results = TreatmentPeriod( id=any_data_id, setup_steps=SetupSteps( steps=[ StartingPopulation( eligible_from_previous_period_col="evaluation_group", starting_pop_label="C", ) ] ), init_data=data, ) print(results.data) # Should be 6 records in new population # ...corresponding to the 6 records in the original control group assert len(results.data) == 6
def test__TreatmentPeriodGenerator(fixture__treatment_period_generator, fixture__population_slice): treatment_period_generator = fixture__treatment_period_generator population_slice = fixture__population_slice results = { treatment_period.id: treatment_period for treatment_period in treatment_period_generator.run( population_slice) } assert results[TreatmentPeriodID( population_slice_id=population_slice.id, time_period=pd.Period("2016-02"))].data.shape == (8, 5)
def test__JobPathStarts__jobpath_operational(fixture__population_slice_setup_steps): ps = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-01-01")), setup_steps=fixture__population_slice_setup_steps, ) tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-01") ) results = TreatmentPeriod( id=tpid, setup_steps=SetupSteps(steps=[JobPathStarts()]), init_data=ps.data, ) # Apparently 1336 people started JobPath in Jan 2016 assert len(results.data[results.data["jobpath_starts"]]) == 1336
def test__TreatmentPeriod( fixture__population_slice, fixture__SampleFromPopulation, ): setup_steps = SetupSteps([ fixture__SampleFromPopulation(0.9), ]) results = TreatmentPeriod( id=TreatmentPeriodID( population_slice_id=fixture__population_slice.id, time_period=pd.Period("2016Q1"), ), setup_steps=setup_steps, init_data=fixture__population_slice.data, ) assert results.data.shape == (9, 5)
def test__EvaluationGroup(): data = pd.DataFrame( {"eligible": [True] * 8 + [False] * 2, "treatment": [True, False] * 5,} ) any_data_id = TreatmentPeriodID( population_slice_id=PopulationSliceID(date="2016-01-01"), time_period=pd.Period("2016-01"), ) results = TreatmentPeriod( id=any_data_id, setup_steps=SetupSteps( steps=[EvaluationGroup(eligible_col="eligible", treatment_col="treatment")] ), init_data=data, ) assert results.data[results.data["evaluation_group"] == "T"].shape == (4, 3)
def test__JobPathStartedEndedSamePeriod(fixture__population_slice_setup_steps): ps = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), setup_steps=fixture__population_slice_setup_steps, ) tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-07") ) results = TreatmentPeriod( id=tpid, setup_steps=SetupSteps(steps=[JobPathStartedEndedSamePeriod()]), init_data=ps.data, ) print(results.data["jobpath_started_and_ended"].value_counts()) print(results.data) # Apparently 2 people started *and* ended JobPath in July 2016 assert len(results.data[results.data["jobpath_started_and_ended"]]) == 2
def test__EvaluationModel__add_periods( fixture__population_slice_generator, fixture__treatment_period_generator ): evaluation_model = EvaluationModel( population_slice_generator=fixture__population_slice_generator, treatment_period_generator=fixture__treatment_period_generator, ) evaluation_model.add_population_slices() evaluation_model.add_treatment_periods() results = evaluation_model.treatment_periods[ TreatmentPeriodID( population_slice_id=PopulationSliceID( pd.Timestamp("2016-01-01", freq="QS-JAN") ), time_period=pd.Period("2016-06", "M"), ) ] assert results.data.shape == (48, 5)
def test__all_SetupSteps__first_TreatmentPeriod( fixture__treatment_period_setup_steps, fixture__population_slice_setup_steps, fixture__treatment_period_expected_columns, ): ps = PopulationSlice( id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), setup_steps=fixture__population_slice_setup_steps, ) tpid = TreatmentPeriodID( population_slice_id=ps.id, time_period=pd.Period("2016-07") ) results = TreatmentPeriod( id=tpid, setup_steps=fixture__treatment_period_setup_steps, init_data=ps.data, ) print(f"Results columns: {results.data.columns}") assert set(results.data.columns) == set(fixture__treatment_period_expected_columns) assert len(results.data) == len(ps.data[ps.data["eligible_population"]]) assert len(results.data[results.data["eligible_population"]]) < len(results.data)
def test__all_SetupSteps_for_EvaluationModel_treatment_periods( fixture__population_slice_setup_steps, fixture__treatment_period_setup_steps, fixture__treatment_period_expected_columns, tmpdir, ): data_handler = ModelDataHandler( database_type="sqlite", location=tmpdir, name="jobpath_evaluation", ) population_slice_generator = PopulationSliceGenerator( setup_steps_by_date={ pd.Timestamp("2016-01-01"): fixture__population_slice_setup_steps }, start=pd.Timestamp("2016-07-01"), end=pd.Timestamp("2016-09-30"), ) treatment_period_generator = TreatmentPeriodGenerator( setup_steps_by_date={ pd.Timestamp("2016-07-01"): fixture__treatment_period_setup_steps }, end=pd.Period("2016-09"), ) evaluation_model = EvaluationModel( data_handler=data_handler, population_slice_generator=population_slice_generator, treatment_period_generator=treatment_period_generator, ) evaluation_model.add_population_slices() evaluation_model.add_treatment_periods() results_id = TreatmentPeriodID( population_slice_id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), time_period=pd.Period("2016-09"), ) results = evaluation_model.treatment_periods[results_id] assert set(results.data.columns) == set(fixture__treatment_period_expected_columns) # Manually check how many people are on LR and eligible assert len(results.data[results.data["eligible_population"]]) < len(results.data) assert len(results.data[results.data["eligible_population"]]) == 64333 assert len(results.data[results.data["jobpath_starts"]]) == 4273