def test_integer_success(self): c = pd.Series(['1', '2', '3'], name='test') p = DatatypeProcessor(Datatype.INTEGER) report = FileFormatReport() c2 = p.process(c, report) assert list(c2) == [1, 2, 3] assert report.errors == []
def test_success(self): c = pd.Series(['a', 'b', 'c'], name='test') p = UniquenessProcessor() report = FileFormatReport() c2 = p.process(c, report) assert c is c2 assert report.errors == []
def test_missing(self): c = pd.Series(['Yes', 'No', '', None], name='test') p = OptionsProcessor(options=['Yes', 'No']) report = FileFormatReport() c2 = p.process(c, report) assert c is c2 assert report.errors == []
def test_success(self): c = pd.Series(['FG10001', 'FG2945', 'FG1249'], name='test') p = RegexProcessor(regex=r'FG\d+') report = FileFormatReport() c2 = p.process(c, report) assert c is c2 assert report.errors == []
def test_float_success(self): c = pd.Series(['1.1', '2', '3'], name='test') p = DatatypeProcessor(Datatype.FLOAT) report = FileFormatReport() c2 = p.process(c, report) expected = pd.Series([1.1, 2.0, 3.0], name="test", dtype="float32") assert c2.equals(expected) assert report.errors == []
def test_date_success(self): c = pd.Series(['10/05/2020', '11/05/2020', '12/05/2020'], name='test') p = DatatypeProcessor(Datatype.DATE, dateformat="DD/MM/YYYY") report = FileFormatReport() c2 = p.process(c, report) expected = pd.Series(["2020-05-10", "2020-05-11", "2020-05-12"], name="test", dtype="str") assert c2.equals(expected) assert report.errors == []
def test_failure(self): c = pd.Series(['Yes', 'No', 'MayBe'], name='test') p = OptionsProcessor(options=['Yes', 'No']) report = FileFormatReport() c2 = p.process(c, report) assert c is c2 assert len(report.errors) == 1 e = report.errors[0] assert e.row_index == 2 assert e.error_code == 'invalid_value' assert e.error_message == "The value is not one of the allowed options: 'MayBe'"
def test_failure(self): c = pd.Series(['FG10001', 'FG2945', 'X1249'], name='test') p = RegexProcessor(regex=r'FG\d+') report = FileFormatReport() c2 = p.process(c, report) assert c is c2 assert len(report.errors) == 1 e = report.errors[0] assert e.row_index == 2 assert e.error_code == 'invalid_pattern' assert e.error_message == r"The value is not matching the pattern FG\d+: 'X1249'"
def test_failure(self): c = pd.Series(['a', 'b', 'a'], name='test') p = UniquenessProcessor() report = FileFormatReport() c2 = p.process(c, report) assert c is c2 assert len(report.errors) == 1 e = report.errors[0] assert e.row_index == 2 assert e.error_code == 'duplicate_value' assert e.error_message == "Found duplicate value: 'a'"
def test_integer_failure(self): c = pd.Series(['1', '2', 'x', '4'], name='test') p = DatatypeProcessor(Datatype.INTEGER) report = FileFormatReport() c2 = p.process(c, report) assert list(c2) == [1, 2, pd.NA, 4] assert len(report.errors) == 1 e = report.errors[0] assert e.row_index == 2 assert e.error_code == 'invalid-value' assert e.error_message == "Invalid integer: 'x'"
def test_date_failure(self): c = pd.Series(['10/05/2020', '11/05/2020', '12-05-2020'], name='test') p = DatatypeProcessor(Datatype.DATE, dateformat="DD/MM/YYYY") report = FileFormatReport() c2 = p.process(c, report) expected = pd.Series(['2020-05-10', '2020-05-11', None], name="test", dtype="str") assert c2.equals(expected) assert len(report.errors) == 1 e = report.errors[0] assert e.row_index == 2 assert e.error_code == 'invalid-value' assert e.error_message == "Invalid date: '12-05-2020'"
def test_float_failure(self): c = pd.Series(['1.5', '2', 'x', '4'], name='test') p = DatatypeProcessor(Datatype.FLOAT) report = FileFormatReport() c2 = p.process(c, report) expected = pd.Series([1.5, 2.0, np.nan, 4.0], name="test", dtype="float32") assert c2.equals(expected) assert len(report.errors) == 1 e = report.errors[0] assert e.row_index == 2 assert e.error_code == 'invalid-value' assert e.error_message == "Invalid number: 'x'"