def test_missing(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        validator.params.missing_sample_size_allowed = True
        df = pd.DataFrame([[np.nan, np.nan, np.nan]],
                          columns=["val", "se", "sample_size"])
        validator.check_bad_sample_size(df, "name", report)

        assert len(report.raised_errors) == 0

        validator.params.missing_sample_size_allowed = False
        validator.check_bad_sample_size(df, "name", report)

        assert len(report.raised_errors) == 1
        assert "check_n_missing" in [
            err.check_data_id[0] for err in report.raised_errors
        ]
    def test_same_day(self):
        params = {
            "data_source": "",
            "span_length": 0,
            "end_date": "2020-09-01",
            "expected_lag": {}
        }
        validator = StaticValidator(params)
        report = ValidationReport([])

        filenames = [("20200901_county_signal_signal.csv", "match_obj")]
        validator.check_missing_date_files(filenames, report)

        assert len(report.raised_errors) == 0
        assert "check_missing_date_files" not in [
            err.check_data_id[0] for err in report.raised_errors
        ]
    def test_invalid_geo_id_hrr(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["1", "11", "111", "8", "88", "888"],
                          columns=["geo_id"])
        validator.check_bad_geo_id_value(df, "name", "hrr", report)

        assert len(report.raised_errors) == 1
        assert "check_bad_geo_id_value" in report.raised_errors[
            0].check_data_id
        assert len(report.raised_errors[0].expression) == 3
        assert "1" not in report.raised_errors[0].expression
        assert "11" not in report.raised_errors[0].expression
        assert "111" not in report.raised_errors[0].expression
        assert "8" in report.raised_errors[0].expression
        assert "88" in report.raised_errors[0].expression
        assert "888" in report.raised_errors[0].expression
    def test_empty_filelist(self):
        params = {
            "data_source": "",
            "span_length": 8,
            "end_date": "2020-09-09",
            "expected_lag": {}
        }
        validator = StaticValidator(params)
        report = ValidationReport([])
        report = ValidationReport([])

        filenames = list()
        validator.check_missing_date_files(filenames, report)

        assert len(report.raised_errors) == 1
        assert "check_missing_date_files" in [
            err.check_data_id[0] for err in report.raised_errors
        ]
        assert len(report.raised_errors[0].expression) == 9
 def test_empty_df(self):
     validator = StaticValidator(self.params)
     report = ValidationReport([])
     empty_df = pd.DataFrame(columns=["geo_id"], dtype=str)
     validator.check_bad_geo_id_value(empty_df, "name", "county", report)
     assert len(report.raised_errors) == 0
 def test_single_column_duplicates_but_not_row(self):
     validator = StaticValidator(self.params)
     report = ValidationReport([])
     df = pd.DataFrame([["a", "1"], ["a", "2"], ["b", "2"]])
     validator.check_duplicate_rows(df, "file", report)
     assert len(report.raised_warnings) == 0