def deploy(run_validation, input, output): """The entry function for invocation""" for intervention_enum in list(Intervention): logger.info( f"Starting to generate files for {intervention_enum.name}.") states_key_name = f'states.{intervention_enum.name}' states_df = get_usa_by_states_df(input, intervention_enum.value) if run_validation: validate_states_df(states_key_name, states_df) upload_csv(states_key_name, states_df.to_csv(), output) states_shp = BytesIO() states_shx = BytesIO() states_dbf = BytesIO() get_usa_state_shapefile(states_df, states_shp, states_shx, states_dbf) if run_validation: validate_states_shapefile(states_key_name, states_shp, states_shx, states_dbf) DatasetDeployer(key=f'{states_key_name}.shp', body=states_shp.getvalue(), output_dir=output).persist() DatasetDeployer(key=f'{states_key_name}.shx', body=states_shx.getvalue(), output_dir=output).persist() DatasetDeployer(key=f'{states_key_name}.dbf', body=states_dbf.getvalue(), output_dir=output).persist() logger.info( f"Generated state shape files for {intervention_enum.name}") counties_key_name = f'counties.{intervention_enum.name}' counties_df = get_usa_by_county_with_projection_df( input, intervention_enum.value) if run_validation: validate_counties_df(counties_key_name, counties_df) upload_csv(counties_key_name, counties_df.to_csv(), output) counties_shp = BytesIO() counties_shx = BytesIO() counties_dbf = BytesIO() get_usa_county_shapefile(counties_df, counties_shp, counties_shx, counties_dbf) if run_validation: validate_counties_shapefile(counties_key_name, counties_shp, counties_shx, counties_dbf) DatasetDeployer(key=f'{counties_key_name}.shp', body=counties_shp.getvalue(), output_dir=output).persist() DatasetDeployer(key=f'{counties_key_name}.shx', body=counties_shx.getvalue(), output_dir=output).persist() DatasetDeployer(key=f'{counties_key_name}.dbf', body=counties_dbf.getvalue(), output_dir=output).persist() logger.info( f"Generated counties shape files for {intervention_enum.name}") print('finished dod job')
def run_projections( state_input_file, county_input_file, intervention: Intervention, run_validation=True ) -> Tuple[DodInterventionResult, DodInterventionResult]: """Run county and state level projections for a specific intervention. Args: input_file: Input file to load model output results from. intervention: Intervention type to summarize. run_validation: If true runs validation on generated shapefiles and dataframes. Returns: Tuple of DodInterventionResult objects for state and county data. """ states_key_name = f"states.{intervention.name}" states_df = build_processed_dataset.get_usa_by_states_df( state_input_file, intervention.value ) if run_validation: validate_results.validate_states_df(states_key_name, states_df) states_shp, states_shx, states_dbf = generate_shapefiles.get_usa_state_shapefile( states_df ) if run_validation: validate_results.validate_states_shapefile( states_key_name, states_shp, states_shx, states_dbf ) logger.info(f"Generated state shape files for {intervention.name}") # Run County level projections counties_key_name = f"counties.{intervention.name}" counties_df = build_processed_dataset.get_usa_by_county_with_projection_df( county_input_file, intervention.value ) if run_validation: validate_results.validate_counties_df(counties_key_name, counties_df) ( counties_shp, counties_shx, counties_dbf, ) = generate_shapefiles.get_usa_county_shapefile(counties_df) if run_validation: validate_results.validate_counties_shapefile( counties_key_name, counties_shp, counties_shx, counties_dbf ) state_results = DodInterventionResult( states_key_name, states_df, (states_shp, states_shx, states_dbf) ) county_results = DodInterventionResult( counties_key_name, counties_df, (counties_shp, counties_shx, counties_dbf) ) return state_results, county_results
def test_get_usa_by_states_df(): empty_df_with_state = pd.DataFrame([], columns=[ "state", ]) with patch("libs.build_processed_dataset.get_state_projections_df", return_value=empty_df_with_state): df = build_processed_dataset.get_usa_by_states_df( input_dir="/tmp", intervention=Intervention.OBSERVED_INTERVENTION) validate_results.validate_states_df("TX", df) tx_record = one( df.loc[df[CommonFields.STATE_FULL_NAME] == "Texas"].to_dict( orient="records")) assert tx_record[CUMULATIVE_POSITIVE_TESTS] > 100 assert tx_record[CUMULATIVE_NEGATIVE_TESTS] > 100
def run_projections( input_file, aggregation_level, intervention: Intervention, run_validation=True ) -> APIPipelineProjectionResult: """Run the projections for the given intervention for states in order to generate the api. Args: input_file: Input file to load model output results from. intervention: Intervention Enum to be used to generate results run_validation: If true runs validation on generated shapefiles and dataframes. Returns: APIPipelineProjectionResult objects for county data. """ if aggregation_level == AggregationLevel.STATE: states_key_name = f"states.{intervention.name}" states_df = build_processed_dataset.get_usa_by_states_df( input_file, intervention.value ) if run_validation: validate_results.validate_states_df(states_key_name, states_df) state_results = APIPipelineProjectionResult( intervention, AggregationLevel.STATE, states_df ) return state_results elif aggregation_level == AggregationLevel.COUNTY: # Run County level projections counties_key_name = f"counties.{intervention.name}" counties_df = build_processed_dataset.get_usa_by_county_with_projection_df( input_file, intervention.value ) if run_validation: validate_results.validate_counties_df( counties_key_name, counties_df, intervention ) county_results = APIPipelineProjectionResult( intervention, AggregationLevel.COUNTY, counties_df ) return county_results else: raise ValueError("Non-valid aggreation level specified")