Exemple #1
0
def test_multi_region_get_one_region():
    ts = timeseries.MultiRegionTimeseriesDataset.from_csv(
        io.StringIO(
            "location_id,county,aggregate_level,date,m1,m2\n"
            "iso1:us#fips:97111,Bar County,county,2020-04-02,2,\n"
            "iso1:us#fips:97222,Foo County,county,2020-04-01,,10\n"
            "iso1:us#fips:97111,Bar County,county,,3,\n"
            "iso1:us#fips:97222,Foo County,county,,,11\n"
        )
    )
    region_97111_ts = ts.get_one_region(Region.from_fips("97111"))
    assert to_dict(["date"], region_97111_ts.data[["date", "m1", "m2"]]) == {
        pd.to_datetime("2020-04-02"): {"m1": 2}
    }
    assert region_97111_ts.latest["m1"] == 3

    region_97222_ts = ts.get_one_region(Region.from_fips("97222"))
    assert to_dict(["date"], region_97222_ts.data) == {
        pd.to_datetime("2020-04-01"): {
            "m2": 10,
            "county": "Foo County",
            "fips": "97222",
            "location_id": "iso1:us#fips:97222",
            "aggregate_level": "county",
        }
    }
    assert region_97222_ts.latest["m2"] == 11
def test_transform():
    with structlog.testing.capture_logs() as logs:
        transformer = update_covid_data_scraper.CovidDataScraperTransformer.make_with_data_root(
            update_covid_data_scraper.DATA_ROOT, structlog.get_logger(),
        )
        transformer.timeseries_csv_local_path = StringIO(
            "locationID,county,country,state,level,cases,deaths,tested,date\n"
            "iso1:us#iso2:us-ak#fips:02013,Aleutians East Borough,United States,Alaska,county,10,1,100,2020-06-01\n"
            "iso1:us#iso2:us-ak#fips:02013,Aleutians East Borough,United States,Alaska,county,11,1,110,2020-06-02\n"
            "iso1:us#iso2:us-ak,,United States,Alaska,state,20,2,200,2020-06-01\n"
            "iso1:us#iso2:us-ak#(unassigned),,United States,Alaska,state,2000,200,20000,2020-06-01\n"
            "iso1:us#iso2:us-ak#(unassigned),,United States,Alaska,county,2000,200,20000,2020-06-01\n"
        )
        df = transformer.transform()

    expected_df = pd.read_csv(
        StringIO(
            "country,county,state,fips,aggregate_level,date,cases,deaths,negative_tests\n"
            "USA,Aleutians East Borough,AK,02013,county,2020-06-01,10,1,90\n"
            "USA,Aleutians East Borough,AK,02013,county,2020-06-02,11,1,99\n"
            "USA,,AK,02,state,2020-06-01,20,2,180"
        ),
        dtype={CommonFields.FIPS: str},
        low_memory=False,
        parse_dates=[CommonFields.DATE],
    )

    assert to_dict(["fips", "date"], df) == to_dict(["fips", "date"], expected_df)

    assert [l["event"] for l in logs] == [
        "Dropping county rows with unexpected locationID",
        "Dropping state rows with unexpected locationID",
        UNEXPECTED_COLUMNS_MESSAGE,
    ]
def test_transform_icu_greater_than_hospitalized():
    in_df = common_df.read_csv(
        StringIO(
            "date,state,positive,negative,fips,pending,inIcuCurrently,hospitalizedCurrently\n"
            "20200401,TX,10,1000,48,,10,100\n"
            "20200402,TX,11,1100,48,,1500,150\n"),
        set_index=False,
    )
    with structlog.testing.capture_logs() as logs:
        out_df = update_covid_tracking_data.transform(in_df)

    expected_df = common_df.read_csv(
        StringIO(
            "date,state,country,aggregate_level,positive_tests,negative_tests,fips,current_icu,current_hospitalized\n"
            "2020-04-01,TX,USA,state,10,1000,48,10,100\n"
            "2020-04-02,TX,USA,state,11,1100,48,,150\n"),
        set_index=False,
    )

    assert to_dict(["fips", "date"], out_df) == to_dict(["fips", "date"],
                                                        expected_df)

    assert [l["event"] for l in logs] == [
        ICU_HOSPITALIZED_MISMATCH_WARNING_MESSAGE,
        helpers.MISSING_COLUMNS_MESSAGE,
    ]
Exemple #4
0
def test_strip_whitespace():
    input_df = pd.read_csv(
        StringIO("""col_a,col_b,col_c,col_num
,b1,c1,1
a2, b2,c2,2
,b3," c3 ",3
"""))
    output_df = strip_whitespace(input_df)
    expected_df = pd.read_csv(
        StringIO("""col_a,col_b,col_c,col_num
,b1,c1,1
a2,b2,c2,2
,b3,c3,3
"""))
    assert to_dict(["col_c"], output_df) == to_dict(["col_c"], expected_df)
Exemple #5
0
def test_inference_ok_with_5_days_cases_changed():
    # 5 days with cases data isn't enough to make inference_ok, 6 days are
    # needed so that there are 5 days with an *delta* relative to a previous day.
    csv_string_io = io.StringIO(
        "location_id,country,state,county,aggregate_level,date,cases,deaths\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,2020-04-01,100,1\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,2020-04-02,200,2\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,2020-04-03,300,3\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,2020-04-04,400,4\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,2020-04-05,500,5\n"
        "iso1:us#fips:97222,US,ZZ,Foo County,county,2020-04-01,100,1\n"
        "iso1:us#fips:97222,US,ZZ,Foo County,county,2020-04-02,200,2\n"
        "iso1:us#fips:97222,US,ZZ,Foo County,county,2020-04-03,300,3\n"
        "iso1:us#fips:97222,US,ZZ,Foo County,county,2020-04-04,400,4\n"
        "iso1:us#fips:97222,US,ZZ,Foo County,county,2020-04-05,500,5\n"
        "iso1:us#fips:97222,US,ZZ,Foo County,county,2020-04-06,600,6\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,,500,5\n"
        "iso1:us#fips:97222,US,ZZ,Foo County,county,,100,1\n")
    input_dataset = MultiRegionTimeseriesDataset.from_csv(csv_string_io)

    df = WhitelistGenerator().generate_whitelist(input_dataset)

    assert to_dict(["fips"], df) == {
        "97111": {
            "state": "ZZ",
            "county": "Bar County",
            "inference_ok": False
        },
        "97222": {
            "state": "ZZ",
            "county": "Foo County",
            "inference_ok": True
        },
    }
Exemple #6
0
def test_fix_missing_pos():
    df = read_csv_and_index_fips_date(
        "fips,date,negative_tests,positive_tests,total_tests,cases\n"
        "97123,2020-04-01,9,1,10,1\n"
        "97123,2020-04-02,17,,20,2\n"
        "97123,2020-04-03,26,4,30,4\n").reset_index()

    result_df, provenance = CovidCountyDataDataSource.synthesize_test_metrics(
        df)

    assert to_dict([CommonFields.FIPS, CommonFields.DATE], result_df) == {
        ("97123", pd.to_datetime("2020-04-01")): {
            CommonFields.NEGATIVE_TESTS: 9,
            CommonFields.POSITIVE_TESTS: 1,
            CommonFields.TOTAL_TESTS: 10,
            CommonFields.CASES: 1,
        },
        ("97123", pd.to_datetime("2020-04-02")): {
            CommonFields.NEGATIVE_TESTS: 17,
            CommonFields.POSITIVE_TESTS: 3,
            CommonFields.TOTAL_TESTS: 20,
            CommonFields.CASES: 2,
        },
        ("97123", pd.to_datetime("2020-04-03")): {
            CommonFields.NEGATIVE_TESTS: 26,
            CommonFields.POSITIVE_TESTS: 4,
            CommonFields.TOTAL_TESTS: 30,
            CommonFields.CASES: 4,
        },
    }
    assert provenance.to_dict() == {
        ("97123", CommonFields.NEGATIVE_TESTS): "none",
        ("97123", CommonFields.POSITIVE_TESTS): "none;missing_pos",
    }
def test_update_nytimes_virgin_islands():

    updater = NYTimesUpdater.make_with_data_root(DATA_ROOT)
    data = common_df.read_csv(
        io.StringIO(
            "county,state_full_name,aggregate_level,fips,date,cases,deaths\n"
            ",Virgin Islands,state,78,2020-07-31,10,1\n")).reset_index()
    results = updater.transform(data)

    expected = common_df.read_csv(
        io.StringIO(
            "country,county,state_full_name,state,aggregate_level,fips,date,cases,deaths\n"
            "USA,,U.S. Virgin Islands,VI,state,78,2020-07-31,10,1\n")
    ).reset_index()
    results_dict = common_test_helpers.to_dict(["state", "state_full_name"],
                                               data)
    expected_dict = common_test_helpers.to_dict(["state", "state_full_name"],
                                                expected)
    assert results_dict == expected_dict
def test_remove_duplicate_city_data():
    input_df = pd.read_csv(
        StringIO("city,county,state,fips,date,metric_a\n"
                 "Smithville,,ZZ,97123,2020-03-23,march23-removed\n"
                 "Smithville,,ZZ,97123,2020-03-22,march22-kept\n"
                 "New York City,,ZZ,97324,2020-03-22,march22-ny-patched\n"
                 ",North County,ZZ,97001,2020-03-22,county-not-touched\n"
                 ",North County,ZZ,97001,2020-03-23,county-not-touched\n"))

    output_df = update_covid_data_scraper.remove_duplicate_city_data(input_df)
    expected_df = pd.read_csv(
        StringIO(
            "city,county,state,fips,date,metric_a\n"
            "Smithville,Smithville,ZZ,97123,2020-03-22,march22-kept\n"
            "New York City,New York,ZZ,97324,2020-03-22,march22-ny-patched\n"
            ",North County,ZZ,97001,2020-03-22,county-not-touched\n"
            ",North County,ZZ,97001,2020-03-23,county-not-touched\n"))

    assert to_dict(["fips", "date"], output_df) == to_dict(["fips", "date"],
                                                           expected_df)
def test_bad_county():
    with structlog.testing.capture_logs() as logs, requests_mock.Mocker() as m:
        m.get(
            SOURCE_URL,
            text="""foo,bar
date,county_name,vents
04/01,Not A County,100
""",
        )
        transformer = CsvCopy.make_with_data_root(DATA_ROOT)
        df = transformer.transform()
    assert to_dict([CommonFields.FIPS, CommonFields.DATE], df) == {}
    assert [l["event"] for l in logs] == [
        "Fetching URL",
        "Imported county name not found in FIPS data",
    ]
def test_bad_float():
    with structlog.testing.capture_logs() as logs, requests_mock.Mocker() as m:
        m.get(
            SOURCE_URL,
            text="""foo,bar
date,county_name,vents
04/01,Carson City,#REF!
""",
        )
        transformer = CsvCopy.make_with_data_root(DATA_ROOT)
        df = transformer.transform()
    assert to_dict([CommonFields.FIPS, CommonFields.DATE], df) == {
        ("32510", "2020-04-01"): {
            "county": "Carson City"
        },
    }
    assert [l["event"]
            for l in logs] == ["Fetching URL", "Dropping value not a float"]
Exemple #11
0
def test_multi_region_get_counties():
    ts = timeseries.MultiRegionTimeseriesDataset.from_csv(
        io.StringIO(
            "location_id,county,aggregate_level,date,m1,m2\n"
            "iso1:us#fips:97111,Bar County,county,2020-04-02,2,\n"
            "iso1:us#fips:97111,Bar County,county,2020-04-03,3,\n"
            "iso1:us#fips:97222,Foo County,county,2020-04-01,,10\n"
            "iso1:us#fips:97,Great State,state,2020-04-01,1,2\n"
            "iso1:us#fips:97111,Bar County,county,,3,\n"
            "iso1:us#fips:97222,Foo County,county,,,10\n"
            "iso1:us#fips:97,Great State,state,,1,2\n"
        )
    )
    counties_ts = ts.get_counties(after=pd.to_datetime("2020-04-01"))
    assert to_dict(["fips", "date"], counties_ts.data[["fips", "date", "m1"]]) == {
        ("97111", pd.to_datetime("2020-04-02")): {"m1": 2},
        ("97111", pd.to_datetime("2020-04-03")): {"m1": 3},
    }
Exemple #12
0
def test_skip_gaps_in_cases_and_deaths_metrics():
    csv_string_io = io.StringIO(
        "location_id,country,state,county,aggregate_level,date,cases,deaths\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,2020-04-01,10,1\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,2020-04-02,,2\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,2020-04-03,30,\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,2020-04-04,40,4\n"
        "iso1:us#fips:97111,US,ZZ,Bar County,county,,40,4\n")
    input_dataset = MultiRegionTimeseriesDataset.from_csv(csv_string_io)

    df = WhitelistGenerator().generate_whitelist(input_dataset)

    assert to_dict(["fips"], df) == {
        "97111": {
            "state": "ZZ",
            "county": "Bar County",
            "inference_ok": False
        },
    }
def test_nha_basic():
    with structlog.testing.capture_logs() as logs, requests_mock.Mocker() as m:
        m.get(
            SOURCE_URL,
            text="""foo,bar
date,county_name,vents
04/01,Carson City,300
04/02,Clark,200
""",
        )
        transformer = CsvCopy.make_with_data_root(DATA_ROOT)
        df = transformer.transform()
    assert to_dict([CommonFields.FIPS, CommonFields.DATE], df) == {
        ("32510", "2020-04-01"): {
            "county": "Carson City",
            "vents": "300"
        },
        ("32003", "2020-04-02"): {
            "county": "Clark County",
            "vents": "200"
        },
    }
    assert [l["event"] for l in logs] == ["Fetching URL"]