def test_provenance(): region_as = Region.from_state("AS") region_tx = Region.from_state("TX") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pt_src1"), CommonFields.TOTAL_TESTS: [100, 200, 300, 400], } metrics_tx = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pt_src2"), CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral( [10, 20, 30, 40], provenance="pos_viral" ), CommonFields.TOTAL_TESTS: [100, 200, 300, 400], } dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) methods = [ DivisionMethod( DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS ), DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS ), ] all_methods = AllMethods.run(dataset_in, methods, diff_days=3) expected_as = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.02], provenance=["pt_src1"])} expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], provenance="pos_viral")} expected_positivity = test_helpers.build_dataset( {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04" ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
def test_preserve_tags(): region_as = Region.from_state("AS") region_tx = Region.from_state("TX") tag1 = test_helpers.make_tag(type=TagType.CUMULATIVE_LONG_TAIL_TRUNCATED, date="2020-04-04") tag2 = test_helpers.make_tag(type=TagType.CUMULATIVE_TAIL_TRUNCATED, date="2020-04-04") tag_drop = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-01") tag3 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-04") tag4 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-03") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral( [1, 2, 3, 4], annotation=[tag1], provenance="pos" ), CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag2]), } metrics_tx = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([None, None, 3, 4], annotation=[tag_drop]), CommonFields.POSITIVE_TESTS_VIRAL: [10, 20, 30, 40], CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag3, tag4]), } dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) methods = [ DivisionMethod( DatasetName("method1"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS ), DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS ), ] all_methods = AllMethods.run(dataset_in, methods, diff_days=3) expected_as = { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.01], provenance="pos", annotation=[tag1, tag2] ) } expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], annotation=[tag3, tag4])} expected_positivity = test_helpers.build_dataset( {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04" ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
def test_basic(): region_tx = Region.from_state("TX") region_sf = Region.from_fips("06075") region_hi = Region.from_state("HI") # Add a timeseries with a tag to make sure they are preserved. ts_with_tag = TimeseriesLiteral( [0, 0, 0], annotation=[test_helpers.make_tag(date="2020-04-01")]) ds_in = test_helpers.build_dataset({ region_tx: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, 0] }, region_sf: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1] }, region_hi: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, None], CommonFields.CASES: ts_with_tag, }, }) with structlog.testing.capture_logs() as logs: ds_out = zeros_filter.drop_all_zero_timeseries( ds_in, [CommonFields.VACCINES_DISTRIBUTED]) ds_expected = test_helpers.build_dataset({ region_sf: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1] }, region_hi: { CommonFields.CASES: ts_with_tag }, }) log = more_itertools.one(logs) assert log["event"] == zeros_filter.DROPPING_TIMESERIES_WITH_ONLY_ZEROS assert pd.MultiIndex.from_tuples([ (region_hi.location_id, CommonFields.VACCINES_DISTRIBUTED), (region_tx.location_id, CommonFields.VACCINES_DISTRIBUTED), ]).equals(log["dropped"]) test_helpers.assert_dataset_like(ds_expected, ds_out)
def test_default_positivity_methods(): # This test intentionally doesn't pass any methods to AllMethods.run to run the methods used # in production. region_as = Region.from_state("AS") region_tx = Region.from_state("TX") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 1, 2, 3, 4, 5, 6, 7], provenance="src1"), CommonFields.NEGATIVE_TESTS: TimeseriesLiteral( [10, 19, 28, 37, 46, 55, 64, 73], provenance="src1" ), } metrics_tx = { CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral( [2, 4, 6, 8, 10, 12, 14, 16], provenance="pos_tests" ), CommonFields.TOTAL_TESTS_VIRAL: [10, 20, 30, 40, 50, 60, 70, 80], } dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) # TODO(tom): Once test positivity code seems stable remove call to datetime.today() in # has_recent_data and remove this freeze_time. with freeze_time("2020-04-14"): all_methods = AllMethods.run(dataset_in, diff_days=1) expected_as = { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], provenance="src1", ) } expected_tx = { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], provenance="pos_tests" ) } expected_positivity = test_helpers.build_dataset( {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-02", ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
def test_column_present_with_no_data(): region_tx = Region.from_state("TX") ds = test_helpers.build_dataset( {region_tx: {CommonFields.TOTAL_TESTS: [100, 200, 400]}}, timeseries_columns=[CommonFields.POSITIVE_TESTS], ) method = DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS, recent_days=1, ) with pytest.raises(test_positivity.NoColumnsWithDataException): AllMethods.run(ds, [method], diff_days=1)
def test_dataclass_include_exclude(): """Tests datasource_regions using mock data for speed.""" region_data = {CommonFields.CASES: [100, 200, 300], CommonFields.DEATHS: [0, 1, 2]} regions_orig = [Region.from_state(state) for state in "AZ CA NY IL TX".split()] + [ Region.from_fips(fips) for fips in "06037 06045 17031 17201".split() ] dataset_orig = test_helpers.build_dataset({region: region_data for region in regions_orig}) # Make a new subclass to keep this test separate from others in the make_dataset lru_cache. class DataSourceForTest(data_source.DataSource): EXPECTED_FIELDS = [CommonFields.CASES, CommonFields.DEATHS] SOURCE_TYPE = "DataSourceForTest" @classmethod def make_dataset(cls) -> timeseries.MultiRegionDataset: return dataset_orig orig_data_source_cls = DataSourceForTest orig_ds = orig_data_source_cls.make_dataset() assert "iso1:us#iso2:us-tx" in orig_ds.location_ids assert "iso1:us#iso2:us-ny" in orig_ds.location_ids ny_source = combined_datasets.datasource_regions( orig_data_source_cls, RegionMask(states=["NY"]) ) ny_ds = ny_source.make_dataset() assert "iso1:us#iso2:us-tx" not in ny_ds.location_ids assert "iso1:us#iso2:us-ny" in ny_ds.location_ids ca_counties_without_la_source = combined_datasets.datasource_regions( orig_data_source_cls, RegionMask(AggregationLevel.COUNTY, states=["CA"]), exclude=Region.from_fips("06037"), ) ds = ca_counties_without_la_source.make_dataset() assert "iso1:us#iso2:us-tx" not in ds.location_ids assert "iso1:us#iso2:us-ca" not in ds.location_ids assert "iso1:us#iso2:us-ca#fips:06045" in ds.location_ids assert "iso1:us#iso2:us-ca#fips:06037" not in ds.location_ids # Just Cook County, IL ds = combined_datasets.datasource_regions( orig_data_source_cls, include=Region.from_fips("17031") ).make_dataset() assert ds.location_ids.to_list() == ["iso1:us#iso2:us-il#fips:17031"]
def test_aggregate_reporting_ratio(reporting_ratio, expected_na): ny_region = Region.from_state("NY") az_region = Region.from_state("AZ") us_region = Region.from_iso1("us") aggregate_map = { ny_region: us_region, az_region: us_region, } metrics = {ny_region: {CommonFields.CASES: [100]}, az_region: {CommonFields.CASES: [None]}} static = {ny_region: {CommonFields.POPULATION: 900}, az_region: {CommonFields.POPULATION: 100}} dataset = test_helpers.build_dataset(metrics, static_by_region_then_field_name=static) agg = statistical_areas.CountyToCBSAAggregator( county_map={ny_region.fips: us_region.fips, az_region.fips: us_region.fips}, cbsa_title_map={us_region.fips: "Stat Area 1"}, aggregations=[], ) aggregation = agg.aggregate(dataset, reporting_ratio_required_to_aggregate=reporting_ratio) cases = aggregation.timeseries[CommonFields.CASES] if expected_na: assert not len(cases) else: assert len(cases)
def test_recent_days(): region_as = Region.from_state("AS") region_tx = Region.from_state("TX") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pos"), CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral( [0, 20, None, None], provenance="pos_viral" ), CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400]), } metrics_tx = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pos"), CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral( [10, 20, 30, 40], provenance="pos_viral" ), CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400]), } ds = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) methods = [ DivisionMethod( DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS ), DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS ), ] methods = _replace_methods_attribute(methods, recent_days=2) all_methods = AllMethods.run(ds, methods, diff_days=1) expected_positivity = test_helpers.build_dataset( { region_as: { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.02, 0.02, 0.02], provenance="pos" ) }, region_tx: { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.1, 0.1, 0.1], provenance="pos_viral" ) }, }, start_date="2020-04-02", ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity) assert all_methods.test_positivity.get_one_region(region_as).provenance == { CommonFields.TEST_POSITIVITY: ["pos"] } assert all_methods.test_positivity.get_one_region(region_tx).provenance == { CommonFields.TEST_POSITIVITY: ["pos_viral"] } methods = _replace_methods_attribute(methods, recent_days=3) all_methods = AllMethods.run(ds, methods, diff_days=1) positivity_provenance = all_methods.test_positivity.provenance assert positivity_provenance.loc["iso1:us#iso2:us-as"].to_dict() == { CommonFields.TEST_POSITIVITY: "pos_viral" } assert positivity_provenance.loc["iso1:us#iso2:us-tx"].to_dict() == { CommonFields.TEST_POSITIVITY: "pos_viral" }