def test__all_SetupSteps_for_EvaluationModel_population_slices( fixture__population_slice_setup_steps, fixture__population_slice__expected_columns, tmpdir, ): population_slice_generator = PopulationSliceGenerator( setup_steps_by_date={ pd.Timestamp("2016-01-01"): fixture__population_slice_setup_steps }, start=pd.Timestamp("2016-07-01"), end=pd.Timestamp("2016-09-30"), ) data_handler = ModelDataHandler( database_type="sqlite", location=tmpdir, name="jobpath_evaluation", ) evaluation_model = EvaluationModel( data_handler=data_handler, population_slice_generator=population_slice_generator, ) evaluation_model.add_population_slices() results = evaluation_model.population_slices[ PopulationSliceID(date=pd.Timestamp("2016-07-01")) ] assert set(results.data.columns) == set(fixture__population_slice__expected_columns) # Manually check how many people are on LR and eligible assert len(results.data) == 315654 assert len(results.data[results.data["eligible_population"]]) == 86240
def test__ModelDataHandler__init(tmpdir): """Simple test to make sure everything gets initiated correctly """ results = ModelDataHandler(database_type="sqlite", location=tmpdir, name="test") assert results.engine.name == sa.create_engine( f"sqlite:///{tmpdir}/test.db").name
def test__ModelDataHandler__read(fixture__population_slice, tmpdir): data_path = f"sqlite:///{tmpdir}/test.db" population_slice = fixture__population_slice data_handler = ModelDataHandler(data_path) data_handler.write( data_type=population_slice.class_name, data_id=population_slice.id, data=population_slice.data, index=False, ) display(population_slice.data) results = data_handler.read( data_type=population_slice.class_name, data_id=population_slice.id, ) display(results) assert results.shape == population_slice.data.shape
def test__ModelDataHandler__write__new(fixture__population_slice, tmpdir): """Given a population_slice instance that's not saved, save it correctly. -- fixture__population_slice returns a population_slice with data.shape (10, 5). -- data_handler.write() saves this to table PopulationSlice, adding date column. -- Reading the PopulationSlice table back directly from database, dropping date... -- ...should return a df with the same shape as population_slice.data """ data_path = f"sqlite:///{tmpdir}/test.db" population_slice = fixture__population_slice data_handler = ModelDataHandler(data_path) data_handler.write( data_type=population_slice.class_name, data_id=population_slice.id, data=population_slice.data, ) engine = sa.create_engine(data_path) df = pd.read_sql("PopulationSlice", con=engine) results = df.loc[df["data_id_date"] == str( population_slice.id.date.date())].drop(["data_id_date", "index"], axis="columns") display(results) display(population_slice.data) assert results.shape == population_slice.data.shape
def test__ModelDataHandler__run__new(fixture__setup_steps_by_date, fixture__population_slice_generator, tmpdir): data_path = f"sqlite:///{tmpdir}/test.db" data_handler = ModelDataHandler(data_path) population_slice_generator = fixture__population_slice_generator results = { population_slice.id: population_slice for population_slice in population_slice_generator.run(data_handler) } key = PopulationSliceID(date=pd.Timestamp("2016-07-01", freq="QS-JAN")) assert results[key].data.shape == ( 90, 5, )
def test__ModelDataHandler__run__existing(fixture__setup_steps_by_date, fixture__population_slice_generator, tmpdir): data_path = f"sqlite:///{tmpdir}/test.db" data_handler = ModelDataHandler(data_path) population_slice_generator = fixture__population_slice_generator # First iteration should run setup_steps then write to storage first_population_slices = { population_slice.id: population_slice for population_slice in population_slice_generator.run(data_handler) } # Second iteration should just read from storage second_population_slices = { population_slice.id: population_slice for population_slice in population_slice_generator.run(data_handler) } key = PopulationSliceID(date=pd.Timestamp("2016-07-01", freq="QS-JAN")) assert (len(first_population_slices[key].data) == len( second_population_slices[key].data))
def test__all_SetupSteps_for_EvaluationModel_treatment_periods( fixture__population_slice_setup_steps, fixture__treatment_period_setup_steps, fixture__treatment_period_expected_columns, tmpdir, ): data_handler = ModelDataHandler( database_type="sqlite", location=tmpdir, name="jobpath_evaluation", ) population_slice_generator = PopulationSliceGenerator( setup_steps_by_date={ pd.Timestamp("2016-01-01"): fixture__population_slice_setup_steps }, start=pd.Timestamp("2016-07-01"), end=pd.Timestamp("2016-09-30"), ) treatment_period_generator = TreatmentPeriodGenerator( setup_steps_by_date={ pd.Timestamp("2016-07-01"): fixture__treatment_period_setup_steps }, end=pd.Period("2016-09"), ) evaluation_model = EvaluationModel( data_handler=data_handler, population_slice_generator=population_slice_generator, treatment_period_generator=treatment_period_generator, ) evaluation_model.add_population_slices() evaluation_model.add_treatment_periods() results_id = TreatmentPeriodID( population_slice_id=PopulationSliceID(date=pd.Timestamp("2016-07-01")), time_period=pd.Period("2016-09"), ) results = evaluation_model.treatment_periods[results_id] assert set(results.data.columns) == set(fixture__treatment_period_expected_columns) # Manually check how many people are on LR and eligible assert len(results.data[results.data["eligible_population"]]) < len(results.data) assert len(results.data[results.data["eligible_population"]]) == 64333 assert len(results.data[results.data["jobpath_starts"]]) == 4273
def test__ModelDataHandler__write__overwrite(fixture__population_slice, tmpdir): """Given a population_slice that overwrites an old one, save it correctly. """ data_path = f"sqlite:///{tmpdir}/test.db" population_slice = fixture__population_slice data_handler = ModelDataHandler(data_path) # Write first version of data data_handler.write( data_type=population_slice.class_name, data_id=population_slice.id, data=population_slice.data, ) # Change the data - it's ok for now to assume same number of columns! population_slice.data = pd.DataFrame(np.random.randint(100, 200, size=(20, 4)), columns=list("ABCD")) population_slice.data["date"] = pd.date_range("2016-01-01", periods=8, freq="QS")[0] # Now write the changed data to database data_handler.write( data_type=population_slice.class_name, data_id=population_slice.id, data=population_slice.data, ) engine = sa.create_engine(data_path) df = pd.read_sql("PopulationSlice", con=engine) display(df) display(df.info()) results = df.loc[df["data_id_date"] == population_slice.id.date].drop( ["data_id_date", "index"], axis="columns") assert results.shape == population_slice.data.shape
AgeEligible, ClaimCodeEligible, ClaimDurationEligible, OnLES, OnJobPath, JobPathStartedEndedSamePeriod, EligiblePopulation, JobPathStarts, EvaluationGroup, ) from evaluation_jp.data import ModelDataHandler evaluation_model = EvaluationModel( data_handler=ModelDataHandler( database_type="sqlite", location="//cskma0294/f/Evaluations/JobPath", name="jobpath_evaluation", ), population_slice_generator=PopulationSliceGenerator( start=pd.Timestamp("2016-01-01"), end=pd.Timestamp("2017-12-31"), freq="QS", setup_steps_by_date={ pd.Timestamp("2016-01-01"): SetupSteps(steps=[ LiveRegisterPopulation( columns_by_type={ "lr_code": "category", "clm_comm_date": "datetime64", "JobPath_Flag": "boolean", "date_of_birth": "datetime64",