Python build_dataset 예제들, tests.test_helpers.build_dataset Python 예제들

예제 #1

0

파일 보기

def test_provenance():
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pt_src1"),
        CommonFields.TOTAL_TESTS: [100, 200, 300, 400],
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pt_src2"),
        CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
            [10, 20, 30, 40], provenance="pos_viral"
        ),
        CommonFields.TOTAL_TESTS: [100, 200, 300, 400],
    }
    dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})

    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
    ]
    all_methods = AllMethods.run(dataset_in, methods, diff_days=3)

    expected_as = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.02], provenance=["pt_src1"])}
    expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], provenance="pos_viral")}
    expected_positivity = test_helpers.build_dataset(
        {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04"
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)

예제 #2

0

파일 보기

def test_preserve_tags():
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    tag1 = test_helpers.make_tag(type=TagType.CUMULATIVE_LONG_TAIL_TRUNCATED, date="2020-04-04")
    tag2 = test_helpers.make_tag(type=TagType.CUMULATIVE_TAIL_TRUNCATED, date="2020-04-04")
    tag_drop = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-01")
    tag3 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-04")
    tag4 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-03")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral(
            [1, 2, 3, 4], annotation=[tag1], provenance="pos"
        ),
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag2]),
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([None, None, 3, 4], annotation=[tag_drop]),
        CommonFields.POSITIVE_TESTS_VIRAL: [10, 20, 30, 40],
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag3, tag4]),
    }
    dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})

    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
    ]
    all_methods = AllMethods.run(dataset_in, methods, diff_days=3)

    expected_as = {
        CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
            [0.01], provenance="pos", annotation=[tag1, tag2]
        )
    }
    expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], annotation=[tag3, tag4])}
    expected_positivity = test_helpers.build_dataset(
        {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04"
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)

예제 #3

0

파일 보기

파일: zeros_filter_test.py 프로젝트: ConsultingMD/covid-data-model

def test_basic():
    region_tx = Region.from_state("TX")
    region_sf = Region.from_fips("06075")
    region_hi = Region.from_state("HI")
    # Add a timeseries with a tag to make sure they are preserved.
    ts_with_tag = TimeseriesLiteral(
        [0, 0, 0], annotation=[test_helpers.make_tag(date="2020-04-01")])
    ds_in = test_helpers.build_dataset({
        region_tx: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, 0]
        },
        region_sf: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1]
        },
        region_hi: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, None],
            CommonFields.CASES: ts_with_tag,
        },
    })

    with structlog.testing.capture_logs() as logs:
        ds_out = zeros_filter.drop_all_zero_timeseries(
            ds_in, [CommonFields.VACCINES_DISTRIBUTED])
    ds_expected = test_helpers.build_dataset({
        region_sf: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1]
        },
        region_hi: {
            CommonFields.CASES: ts_with_tag
        },
    })
    log = more_itertools.one(logs)
    assert log["event"] == zeros_filter.DROPPING_TIMESERIES_WITH_ONLY_ZEROS
    assert pd.MultiIndex.from_tuples([
        (region_hi.location_id, CommonFields.VACCINES_DISTRIBUTED),
        (region_tx.location_id, CommonFields.VACCINES_DISTRIBUTED),
    ]).equals(log["dropped"])
    test_helpers.assert_dataset_like(ds_expected, ds_out)

예제 #4

0

파일 보기

def test_default_positivity_methods():
    # This test intentionally doesn't pass any methods to AllMethods.run to run the methods used
    # in production.
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 1, 2, 3, 4, 5, 6, 7], provenance="src1"),
        CommonFields.NEGATIVE_TESTS: TimeseriesLiteral(
            [10, 19, 28, 37, 46, 55, 64, 73], provenance="src1"
        ),
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
            [2, 4, 6, 8, 10, 12, 14, 16], provenance="pos_tests"
        ),
        CommonFields.TOTAL_TESTS_VIRAL: [10, 20, 30, 40, 50, 60, 70, 80],
    }
    dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})

    # TODO(tom): Once test positivity code seems stable remove call to datetime.today() in
    #  has_recent_data and remove this freeze_time.
    with freeze_time("2020-04-14"):
        all_methods = AllMethods.run(dataset_in, diff_days=1)

    expected_as = {
        CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], provenance="src1",
        )
    }
    expected_tx = {
        CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
            [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], provenance="pos_tests"
        )
    }
    expected_positivity = test_helpers.build_dataset(
        {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-02",
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)

예제 #5

0

파일 보기

def test_column_present_with_no_data():
    region_tx = Region.from_state("TX")
    ds = test_helpers.build_dataset(
        {region_tx: {CommonFields.TOTAL_TESTS: [100, 200, 400]}},
        timeseries_columns=[CommonFields.POSITIVE_TESTS],
    )
    method = DivisionMethod(
        DatasetName("method2"),
        CommonFields.POSITIVE_TESTS,
        CommonFields.TOTAL_TESTS,
        recent_days=1,
    )
    with pytest.raises(test_positivity.NoColumnsWithDataException):
        AllMethods.run(ds, [method], diff_days=1)

예제 #6

0

파일 보기

파일: combined_dataset_test.py 프로젝트: epius/covid-data-model

def test_dataclass_include_exclude():
    """Tests datasource_regions using mock data for speed."""
    region_data = {CommonFields.CASES: [100, 200, 300], CommonFields.DEATHS: [0, 1, 2]}
    regions_orig = [Region.from_state(state) for state in "AZ CA NY IL TX".split()] + [
        Region.from_fips(fips) for fips in "06037 06045 17031 17201".split()
    ]
    dataset_orig = test_helpers.build_dataset({region: region_data for region in regions_orig})

    # Make a new subclass to keep this test separate from others in the make_dataset lru_cache.
    class DataSourceForTest(data_source.DataSource):
        EXPECTED_FIELDS = [CommonFields.CASES, CommonFields.DEATHS]
        SOURCE_TYPE = "DataSourceForTest"

        @classmethod
        def make_dataset(cls) -> timeseries.MultiRegionDataset:
            return dataset_orig

    orig_data_source_cls = DataSourceForTest
    orig_ds = orig_data_source_cls.make_dataset()
    assert "iso1:us#iso2:us-tx" in orig_ds.location_ids
    assert "iso1:us#iso2:us-ny" in orig_ds.location_ids

    ny_source = combined_datasets.datasource_regions(
        orig_data_source_cls, RegionMask(states=["NY"])
    )
    ny_ds = ny_source.make_dataset()
    assert "iso1:us#iso2:us-tx" not in ny_ds.location_ids
    assert "iso1:us#iso2:us-ny" in ny_ds.location_ids

    ca_counties_without_la_source = combined_datasets.datasource_regions(
        orig_data_source_cls,
        RegionMask(AggregationLevel.COUNTY, states=["CA"]),
        exclude=Region.from_fips("06037"),
    )
    ds = ca_counties_without_la_source.make_dataset()
    assert "iso1:us#iso2:us-tx" not in ds.location_ids
    assert "iso1:us#iso2:us-ca" not in ds.location_ids
    assert "iso1:us#iso2:us-ca#fips:06045" in ds.location_ids
    assert "iso1:us#iso2:us-ca#fips:06037" not in ds.location_ids

    # Just Cook County, IL
    ds = combined_datasets.datasource_regions(
        orig_data_source_cls, include=Region.from_fips("17031")
    ).make_dataset()
    assert ds.location_ids.to_list() == ["iso1:us#iso2:us-il#fips:17031"]

예제 #7

0

파일 보기

def test_aggregate_reporting_ratio(reporting_ratio, expected_na):
    ny_region = Region.from_state("NY")
    az_region = Region.from_state("AZ")
    us_region = Region.from_iso1("us")
    aggregate_map = {
        ny_region: us_region,
        az_region: us_region,
    }
    metrics = {ny_region: {CommonFields.CASES: [100]}, az_region: {CommonFields.CASES: [None]}}
    static = {ny_region: {CommonFields.POPULATION: 900}, az_region: {CommonFields.POPULATION: 100}}
    dataset = test_helpers.build_dataset(metrics, static_by_region_then_field_name=static)

    agg = statistical_areas.CountyToCBSAAggregator(
        county_map={ny_region.fips: us_region.fips, az_region.fips: us_region.fips},
        cbsa_title_map={us_region.fips: "Stat Area 1"},
        aggregations=[],
    )
    aggregation = agg.aggregate(dataset, reporting_ratio_required_to_aggregate=reporting_ratio)
    cases = aggregation.timeseries[CommonFields.CASES]
    if expected_na:
        assert not len(cases)
    else:
        assert len(cases)

예제 #8

0

파일 보기

def test_recent_days():
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pos"),
        CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
            [0, 20, None, None], provenance="pos_viral"
        ),
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400]),
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pos"),
        CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
            [10, 20, 30, 40], provenance="pos_viral"
        ),
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400]),
    }
    ds = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})
    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
    ]
    methods = _replace_methods_attribute(methods, recent_days=2)
    all_methods = AllMethods.run(ds, methods, diff_days=1)

    expected_positivity = test_helpers.build_dataset(
        {
            region_as: {
                CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
                    [0.02, 0.02, 0.02], provenance="pos"
                )
            },
            region_tx: {
                CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
                    [0.1, 0.1, 0.1], provenance="pos_viral"
                )
            },
        },
        start_date="2020-04-02",
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
    assert all_methods.test_positivity.get_one_region(region_as).provenance == {
        CommonFields.TEST_POSITIVITY: ["pos"]
    }
    assert all_methods.test_positivity.get_one_region(region_tx).provenance == {
        CommonFields.TEST_POSITIVITY: ["pos_viral"]
    }

    methods = _replace_methods_attribute(methods, recent_days=3)
    all_methods = AllMethods.run(ds, methods, diff_days=1)
    positivity_provenance = all_methods.test_positivity.provenance
    assert positivity_provenance.loc["iso1:us#iso2:us-as"].to_dict() == {
        CommonFields.TEST_POSITIVITY: "pos_viral"
    }
    assert positivity_provenance.loc["iso1:us#iso2:us-tx"].to_dict() == {
        CommonFields.TEST_POSITIVITY: "pos_viral"
    }