def test_more_than_two_copies(self):
     validator = StaticValidator(self.params)
     report = ValidationReport([])
     df = pd.DataFrame([["a", "1"], ["b", "2"], ["b", "2"], ["b", "2"]])
     validator.check_duplicate_rows(df, FILENAME, report)
     assert len(report.raised_warnings) == 1
     assert report.raised_warnings[0].check_name == "check_duplicate_rows"
    def test_empty_df(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        empty_df = pd.DataFrame(columns=["geo_id"], dtype=str)
        validator.check_bad_geo_id_format(empty_df, FILENAME, "county", report)

        assert len(report.raised_errors) == 0
Beispiel #3
0
    def test_invalid_geo_id_format_hhs(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["1", "112"], columns=["geo_id"])
        validator.check_bad_geo_id_format(df, FILENAME, "hhs", report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_geo_id_format"
Beispiel #4
0
    def test_invalid_geo_type(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        empty_df = pd.DataFrame(columns=["geo_id"], dtype=str)
        validator.check_bad_geo_id_format(empty_df, FILENAME, "hello", report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_geo_type"
Beispiel #5
0
    def test_lt_0(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame([-5], columns=["val"])
        validator.check_bad_val(df, FILENAME, "signal", report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_val_lt_0"
Beispiel #6
0
    def test_gt_max_prop(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame([1e7], columns=["val"])
        validator.check_bad_val(df, FILENAME, "prop", report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_val_prop_gt_100k"
Beispiel #7
0
    def test_invalid_geo_id_value_nation(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["us", "zz"], columns=["geo_id"])
        validator.check_bad_geo_id_value(df, FILENAME, "nation", report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_bad_geo_id_value"
Beispiel #8
0
    def test_invalid_geo_id_format_state(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["aa", "hi", "HI", "hawaii", "Hawaii", "a", "H.I."],
                          columns=["geo_id"])
        validator.check_bad_geo_id_format(df, FILENAME, "state", report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_geo_id_format"
Beispiel #9
0
    def test_uppercase_geo_id(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["ak", "AK"], columns=["geo_id"])
        validator.check_bad_geo_id_value(df, FILENAME, "state", report)

        assert len(report.raised_errors) == 0
        assert len(report.raised_warnings) == 1
        assert report.raised_warnings[0].check_name == "check_geo_id_lowercase"
Beispiel #10
0
    def test_lt_min_missing_not_allowed(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        validator.params.missing_sample_size_allowed = False
        df = pd.DataFrame([[1, 0, 10], [1, np.nan, 240], [1, np.nan, 245]],
                          columns=["val", "se", "sample_size"])
        validator.check_bad_sample_size(df, FILENAME, report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_n_gt_min"
Beispiel #11
0
    def test_e_0_missing_not_allowed(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        validator.params.missing_se_allowed = False
        df = pd.DataFrame([[1, 0, 200], [1, 0, np.nan], [1, np.nan, np.nan]],
                          columns=["val", "se", "sample_size"])
        validator.check_bad_se(df, FILENAME, report)

        assert len(report.raised_errors) == 2
        assert "check_se_not_missing_and_in_range" in [
            err.check_name for err in report.raised_errors
        ]
        assert "check_se_0" in [err.check_name for err in report.raised_errors]
Beispiel #12
0
    def test_same_day(self):
        params = {
            "common": {
                "data_source": "",
                "span_length": 0,
                "end_date": "2020-09-01"
            }
        }
        validator = StaticValidator(params)
        report = ValidationReport([])

        filenames = [("20200901_county_signal_signal.csv", "match_obj")]
        validator.check_missing_date_files(filenames, report)

        assert len(report.raised_errors) == 0
Beispiel #13
0
    def test_empty_df(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        empty_df = pd.DataFrame(columns=["val"])
        validator.check_bad_val(empty_df, "", "", report)
        validator.check_bad_val(empty_df, "", "prop", report)
        validator.check_bad_val(empty_df, "", "pct", report)

        assert len(report.raised_errors) == 0
    def test_empty_filelist(self):
        params = {
            "common": {
                "data_source": "",
                "span_length": 8,
                "end_date": "2020-09-09"
            }
        }
        validator = StaticValidator(params)
        report = ValidationReport([])
        report = ValidationReport([])

        filenames = list()
        validator.check_missing_date_files(filenames, report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_missing_date_files"
    def test_duplicate_dates(self):
        params = {
            "common": {
                "data_source": "",
                "span_length": 1,
                "end_date": "2020-09-02"
            }
        }
        validator = StaticValidator(params)
        report = ValidationReport([])

        filenames = [("20200901_county_signal_signal.csv", "match_obj"),
                     ("20200903_county_signal_signal.csv", "match_obj"),
                     ("20200903_usa_signal_signal.csv", "match_obj"),
                     ("20200903_usa_signal_signal.csv", "match_obj")]
        validator.check_missing_date_files(filenames, report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_missing_date_files"
Beispiel #16
0
    def test_empty_df(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        empty_df = pd.DataFrame(columns=["val", "se", "sample_size"],
                                dtype=float)
        validator.check_bad_sample_size(empty_df, "", report)

        assert len(report.raised_errors) == 0

        validator.params.missing_sample_size_allowed = True
        validator.check_bad_sample_size(empty_df, "", report)

        assert len(report.raised_errors) == 0
    def test_state_level_fips(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["37183", "56000", "04000", "60000", "78000"], columns=["geo_id"])
        validator.check_bad_geo_id_value(df, FILENAME, "county", report)

        assert len(report.raised_errors) == 0

        df = pd.DataFrame(["37183", "56000", "04000", "60000", "78000", "99000"], columns=["geo_id"])
        validator.check_bad_geo_id_value(df, FILENAME, "county", report)

        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_bad_geo_id_value"
Beispiel #18
0
    def test_additional_valid_geo_ids(self):
        params = self.params.copy()
        params["static"] = {
            "additional_valid_geo_values": {
                "state": ["state1"],
                "county": ["county1", "county2"]
            }
        }
        validator = StaticValidator(params)
        report = ValidationReport([])

        df = pd.DataFrame(["05109", "06019", "county2"], columns=["geo_id"])
        validator.check_bad_geo_id_value(df, FILENAME, "county", report)
        assert len(report.raised_errors) == 0

        df = pd.DataFrame(["ma", "state1", "mi"], columns=["geo_id"])
        validator.check_bad_geo_id_value(df, FILENAME, "state", report)
        assert len(report.raised_errors) == 0

        df = pd.DataFrame(["county2", "02"], columns=["geo_id"])
        validator.check_bad_geo_id_value(df, FILENAME, "hhs", report)
        assert len(report.raised_errors) == 1
        assert report.raised_errors[0].check_name == "check_bad_geo_id_value"
Beispiel #19
0
 def test_single_column_duplicates_but_not_row(self):
     validator = StaticValidator(self.params)
     report = ValidationReport([])
     df = pd.DataFrame([["a", "1"], ["a", "2"], ["b", "2"]])
     validator.check_duplicate_rows(df, FILENAME, report)
     assert len(report.raised_warnings) == 0