def deploy(run_validation, input, output):
    """The entry function for invocation"""

    for intervention_enum in list(Intervention):
        logger.info(
            f"Starting to generate files for {intervention_enum.name}.")

        states_key_name = f'states.{intervention_enum.name}'
        states_df = get_usa_by_states_df(input, intervention_enum.value)
        if run_validation:
            validate_states_df(states_key_name, states_df)
        upload_csv(states_key_name, states_df.to_csv(), output)

        states_shp = BytesIO()
        states_shx = BytesIO()
        states_dbf = BytesIO()
        get_usa_state_shapefile(states_df, states_shp, states_shx, states_dbf)
        if run_validation:
            validate_states_shapefile(states_key_name, states_shp, states_shx,
                                      states_dbf)
        DatasetDeployer(key=f'{states_key_name}.shp',
                        body=states_shp.getvalue(),
                        output_dir=output).persist()
        DatasetDeployer(key=f'{states_key_name}.shx',
                        body=states_shx.getvalue(),
                        output_dir=output).persist()
        DatasetDeployer(key=f'{states_key_name}.dbf',
                        body=states_dbf.getvalue(),
                        output_dir=output).persist()
        logger.info(
            f"Generated state shape files for {intervention_enum.name}")

        counties_key_name = f'counties.{intervention_enum.name}'
        counties_df = get_usa_by_county_with_projection_df(
            input, intervention_enum.value)
        if run_validation:
            validate_counties_df(counties_key_name, counties_df)
        upload_csv(counties_key_name, counties_df.to_csv(), output)

        counties_shp = BytesIO()
        counties_shx = BytesIO()
        counties_dbf = BytesIO()
        get_usa_county_shapefile(counties_df, counties_shp, counties_shx,
                                 counties_dbf)
        if run_validation:
            validate_counties_shapefile(counties_key_name, counties_shp,
                                        counties_shx, counties_dbf)
        DatasetDeployer(key=f'{counties_key_name}.shp',
                        body=counties_shp.getvalue(),
                        output_dir=output).persist()
        DatasetDeployer(key=f'{counties_key_name}.shx',
                        body=counties_shx.getvalue(),
                        output_dir=output).persist()
        DatasetDeployer(key=f'{counties_key_name}.dbf',
                        body=counties_dbf.getvalue(),
                        output_dir=output).persist()
        logger.info(
            f"Generated counties shape files for {intervention_enum.name}")

    print('finished dod job')
Ejemplo n.º 2
0
def run_projections(
    state_input_file, county_input_file, intervention: Intervention, run_validation=True
) -> Tuple[DodInterventionResult, DodInterventionResult]:
    """Run county and state level projections for a specific intervention.

    Args:
        input_file: Input file to load model output results from.
        intervention: Intervention type to summarize.
        run_validation: If true runs validation on generated shapefiles
            and dataframes.

    Returns: Tuple of DodInterventionResult objects for state and county data.
    """
    states_key_name = f"states.{intervention.name}"
    states_df = build_processed_dataset.get_usa_by_states_df(
        state_input_file, intervention.value
    )
    if run_validation:
        validate_results.validate_states_df(states_key_name, states_df)

    states_shp, states_shx, states_dbf = generate_shapefiles.get_usa_state_shapefile(
        states_df
    )
    if run_validation:
        validate_results.validate_states_shapefile(
            states_key_name, states_shp, states_shx, states_dbf
        )
    logger.info(f"Generated state shape files for {intervention.name}")

    # Run County level projections
    counties_key_name = f"counties.{intervention.name}"
    counties_df = build_processed_dataset.get_usa_by_county_with_projection_df(
        county_input_file, intervention.value
    )
    if run_validation:
        validate_results.validate_counties_df(counties_key_name, counties_df)

    (
        counties_shp,
        counties_shx,
        counties_dbf,
    ) = generate_shapefiles.get_usa_county_shapefile(counties_df)
    if run_validation:
        validate_results.validate_counties_shapefile(
            counties_key_name, counties_shp, counties_shx, counties_dbf
        )

    state_results = DodInterventionResult(
        states_key_name, states_df, (states_shp, states_shx, states_dbf)
    )

    county_results = DodInterventionResult(
        counties_key_name, counties_df, (counties_shp, counties_shx, counties_dbf)
    )
    return state_results, county_results
Ejemplo n.º 3
0
def test_get_usa_by_states_df():
    empty_df_with_state = pd.DataFrame([], columns=[
        "state",
    ])
    with patch("libs.build_processed_dataset.get_state_projections_df",
               return_value=empty_df_with_state):
        df = build_processed_dataset.get_usa_by_states_df(
            input_dir="/tmp", intervention=Intervention.OBSERVED_INTERVENTION)
    validate_results.validate_states_df("TX", df)

    tx_record = one(
        df.loc[df[CommonFields.STATE_FULL_NAME] == "Texas"].to_dict(
            orient="records"))
    assert tx_record[CUMULATIVE_POSITIVE_TESTS] > 100
    assert tx_record[CUMULATIVE_NEGATIVE_TESTS] > 100
Ejemplo n.º 4
0
def run_projections(
    input_file, aggregation_level, intervention: Intervention, run_validation=True
) -> APIPipelineProjectionResult:
    """Run the projections for the given intervention for states
    in order to generate the api.

    Args:
        input_file: Input file to load model output results from.
        intervention: Intervention Enum to be used to generate results
        run_validation: If true runs validation on generated shapefiles
            and dataframes.

    Returns: APIPipelineProjectionResult objects for county data.
    """

    if aggregation_level == AggregationLevel.STATE:
        states_key_name = f"states.{intervention.name}"
        states_df = build_processed_dataset.get_usa_by_states_df(
            input_file, intervention.value
        )
        if run_validation:
            validate_results.validate_states_df(states_key_name, states_df)

        state_results = APIPipelineProjectionResult(
            intervention, AggregationLevel.STATE, states_df
        )
        return state_results
    elif aggregation_level == AggregationLevel.COUNTY:
        # Run County level projections
        counties_key_name = f"counties.{intervention.name}"
        counties_df = build_processed_dataset.get_usa_by_county_with_projection_df(
            input_file, intervention.value
        )
        if run_validation:
            validate_results.validate_counties_df(
                counties_key_name, counties_df, intervention
            )

        county_results = APIPipelineProjectionResult(
            intervention, AggregationLevel.COUNTY, counties_df
        )
        return county_results
    else:
        raise ValueError("Non-valid aggreation level specified")