def test_compare_counties(data_clean, gdf_processed):
    neighbors = get_neighbors("Greene", gdf_processed)
    compare_list = ["Greene"] + neighbors
    data_clean_deaths = data_clean["deaths"]
    clean_rules = DATA_INDEX["deaths"]["clean_rules"]
    result = compare_counties(
        data_clean_deaths,
        clean_rules=clean_rules,
        compare_field="moving_avg_per_capita",
        counties=compare_list,
    )
    print("\n", result)
def test_process_neighbors_per_capita(data_clean, gdf_processed):
    neighbors = get_neighbors("Dauphin", gdf_processed)
    compare_list = ["Dauphin"] + neighbors
    data_clean_cases = data_clean["cases"]
    clean_rules = DATA_INDEX["cases"]["clean_rules"]
    df = compare_counties(
        data_clean_cases,
        clean_rules=clean_rules,
        compare_field="moving_avg_per_capita",
        counties=compare_list,
    )
    df = df.set_index("date")
    print(df)
Example #3
0
def greene_region_deaths_moving_avg_per_cap(data_clean, gdf_processed) -> pd.DataFrame:
    """
    A DataFrame representing a day-by-day comparison of moving avg number of new daily cases, per capita,
    for Dauphin and neighboring counties.
    """
    neighbors = get_neighbors("Greene", gdf_processed)
    compare_list = ["Greene"] + neighbors
    data_clean_deaths = data_clean["deaths"]
    clean_rules = DATA_INDEX["deaths"]["clean_rules"]
    return compare_counties(
        data_clean_deaths,
        clean_rules=clean_rules,
        compare_field="moving_avg_per_capita",
        counties=compare_list,
    )
def test_process_neighbors_total(data_clean, gdf_processed):
    neighbors = get_neighbors("Dauphin", gdf_processed)
    compare_list = ["Dauphin"] + neighbors
    data_clean_cases = data_clean["cases"]
    clean_rules = DATA_INDEX["cases"]["clean_rules"]
    df = compare_counties(
        data_clean_cases,
        clean_rules=clean_rules,
        compare_field="total",
        counties=compare_list,
    )
    df = df.set_index("date")
    cases_lebanon_july_26 = df.at["2020-07-26", "lebanon"]
    cases_lebanon_july_23 = df.at["2020-07-23", "cumberland"]
    assert cases_lebanon_july_26 == 1544
    assert cases_lebanon_july_23 == 1066
    def gen_desc_neighbors(self, *, data_type: str) -> str:
        neighbor_list = get_neighbors(self.county_name_clean, self.gdf)
        neighbor_count = len(neighbor_list)
        region_list = [self.county_name_clean] + neighbor_list
        region_gdf = self.gdf[self.gdf["NAME"].isin(region_list)]
        rank_from_top, rank_from_bottom, others_with_same_rank = self.gdf_get_ranking(
            region_gdf, f"{data_type}_added_past_two_weeks_per_capita")
        per_capita_rank_among_neighbors = rank_text(rank_from_top,
                                                    rank_from_bottom)
        if others_with_same_rank > 0 and others_with_same_rank != neighbor_count:
            sentence_frag = (
                f"[b]{others_with_same_rank}[/b] other "
                f"{p.singular_noun('counties', others_with_same_rank)} had the same per capita rate of"
                f" {data_type}.")
        else:
            sentence_frag = ""

        return (
            f"Compared to its {p.number_to_words(neighbor_count)} neighboring counties, {self.county_name_clean} "
            f"County had the [b]{per_capita_rank_among_neighbors}[/b] number of {data_type} per 100,000 people over "
            f"the past two weeks. {sentence_frag} Here's how {self.county_name_clean}'s per capita 7-day moving average compares to "
            f"its neighbors:")
def gen_chart(
    county_name_clean: str,
    data_type: str,
    *,
    data_index: Dict,
    chart_dict: Dict,
    data_clean: Dict,
    county_data: Dict[str, pd.DataFrame],
    gdf: geopandas.GeoDataFrame,
    primary_color: str,
    secondary_color: str,
    aws_bucket: str,
    aws_dir: str,
) -> Dict[str, Union[Union[str, None, List[Dict[str, str]]], Any]]:
    """
    Creates a chart PNG using Altair and moves its to s3. Returns an URL to the image, a Dict representing a legend
    for the chart

    Args:
        county_name_clean (str): Name of county without 'County' suffix. Eg. "Dauphin"
        data_type (str): Type of data. Eg. "cases".
        data_index (Dict): Config settings for data.
        chart_dict (Dict): Config settings for chart.
        data_clean (Dict[str, pd.DataFrame]): Dict of pandas dfs of cases, deaths, tests data for all Pa. counties
            that has has some minimal cleaning.
        county_data (Dict[str, pd.DataFrame]: Processed cases, deaths, tests, etc data for a specific county.
        gdf (geopandas.GeoDataFrame): Pa geodataframe with cases, deaths, tests data merged on to it.
        primary_color (str): Hex code for color theme.
        secondary_color (str): Hex code for color theme.
        aws_bucket (str): AWS bucket where charts will be uploaded to. Defaults to value stored in
        definitions.py
        aws_dir (str): Directory within AWS bucket where charts will be uploaded. Defaults to value stored in
        definitions.py

    Returns:
        Dict[str, Union[Union[str, None, List[Dict[str, str]]], Any]]: Dict with keys relating to chart, legend,
        and chart descriptive text.
    """

    chart_type = chart_dict["type"]
    custom_legend = None
    fmt = "png"
    content_type = "image/png"
    gen_desc = GenStats(county_name_clean, gdf=gdf)

    if "daily_and_avg" in chart_type:
        chart = chart_bar_and_line(
            data_type=data_type,
            df=county_data[data_type],
            line_color=primary_color,
            bar_color=secondary_color,
        )
        custom_legend = chart_dict.get("custom_legend")
        chart_desc = gen_desc.gen_desc_daily(data_type=data_type)

    elif "choropleth" in chart_type:
        chart = map_choropleth(
            gdf,
            color_field=chart_dict["color_field"],
            highlight_polygon=county_name_clean,
            min_color=secondary_color,
            max_color=primary_color,
            legend_title=chart_dict["legend_title"],
        )
        chart_desc = gen_desc.gen_desc_choro(data_type=data_type)

    elif "neigbhors_per_capita" in chart_type:
        compare_field = chart_dict["compare_field"]
        neighbors = get_neighbors(county_name_clean, gdf)
        neighbors = sort_counties_by_pop(neighbors)
        compare_list = [county_name_clean] + neighbors
        df_data_type = data_clean[data_type]
        clean_rules = data_index[data_type]["clean_rules"]
        df_multi_county = compare_counties(
            df_data_type,
            clean_rules=clean_rules,
            compare_field=compare_field,
            counties=compare_list,
        )
        county_cols = list(df_multi_county.columns)
        county_cols.remove("date")
        df_multi_county = stack_df(df_multi_county,
                                   stack_cols=county_cols,
                                   x_axis_col="date")
        chart = chart_faceted(
            df_multi_county,
            category_col="category",
            x_axis_col="date",
            y_axis_col="value",
            line_color=primary_color,
        )
        custom_legend = None
        chart_desc = gen_desc.gen_desc_neighbors(data_type=data_type)

    elif "stacked_area" in chart_type:
        df = process_cumulative_tests(county_data["confirmed"],
                                      county_data["tests"])
        chart = chart_stacked_area(
            df,
            x_axis_col="date",
            y_axis_col="count",
            category_col="data_type",
            domain=["positive", "negative"],
            range_=[primary_color, secondary_color],
        )
        custom_legend = chart_dict.get("custom_legend")
        chart_desc = gen_desc.gen_desc_area_tests()
    else:
        raise Exception(
            "Chart type not found. Did you provide a valid chart type in chart_index?"
        )

    image_filename = f"{county_name_clean.lower()}_{data_type}_{chart_type}.{fmt}"
    image_path = DIR_OUTPUT / image_filename
    save(chart, str(image_path))
    logging.info("...saved")

    # Move to s3
    copy_to_s3(image_path, aws_bucket, aws_dir, content_type=content_type)

    return {
        "title": chart_dict.get("title", "").upper(),
        "custom_legend": custom_legend,
        "image_path": f"https://{aws_bucket}/{aws_dir}/{image_filename}",
        "description": chart_desc,
    }
def test_get_neighbors(gdf_processed):
    neighbors = get_neighbors("Dauphin", gdf_processed)
    assert "Lebanon" in neighbors
    assert "Allegheny" not in neighbors
    print(neighbors)