class Validator(Vlad): source = LocalFile("repos.csv") validators = { "contact": [ RegexValidator(r"\w[\w\-' ]+ <[\w\-.]+@[\w\-.]+>", full=True, empty_ok=True) ], "doi": [RegexValidator(r"[\w\-./]+", full=True, empty_ok=True)], "funders": [RegexValidator(r"(\d+;?)+", full=True, empty_ok=True)], "homepage_url": [RegexValidator(r"https?://.+", full=True, empty_ok=True)], "licence": [ SetValidator( [ "Apache-2.0", "Artistic-2.0", "BSD-2-Clause", "BSD-3-Clause", "CECILL-2.1", "GPL-2.0", "GPL-3.0", "MIT", "MPL-2.0", "NCSA", ], empty_ok=True, ) ], "organisations": [SetValidator(["grid.457348.9", "grid.4991.5"], empty_ok=True)], "rsotm": [RegexValidator(r"\d{4}-\d{2}", full=True, empty_ok=True)], "url": [UniqueValidator()], }
# But for now let's add an additional read through the data # Annoyingly, you have to instantiate that "validators" block for each record # Otherwise they seem to accumulate extra values. # Beyond the 2 new validators defined above, we're using vladiate defined options allgood = True badfiles = [] for sfn in os.listdir(args.indir + args.sub): if sfn not in skip and sfn.endswith(".tsv"): validators = { 'docId': [ UniqueValidator(unique_with=['annotSet', 'annotId']), #UniqueValidator(unique_with=['annotSet', 'startOffset']) ], 'annotId': [RegexValidator(pattern=r'T?\d*-?\d+', full=True)], 'annotType': [ SetValidator([ 'Quantity', 'Qualifier', 'MeasuredProperty', 'MeasuredEntity' ]) ], 'annotSet': [IntValidator()], 'startOffset': [IntValidator()], 'endOffset': [IntValidator()], 'other': [JsonValidator(empty_ok=True)], 'text': [LengthValidator()] } #print(sfn) truth = Vlad(source=LocalFile(args.indir + args.sub + sfn), validators=validators,
def test_regex_validator_works(pattern, field): RegexValidator(pattern).validate(field)
def test_regex_validator_fails(pattern, field): validator = RegexValidator(pattern) with pytest.raises(ValidationException): validator.validate(field) assert validator.bad == {field}
class WeatherValidator(Vlad): source = TOA5Input('SPER_CR6_Daily.dat') validators = { 'TIMESTAMP': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'RECORD': [UniqueValidator(empty_ok=False)], 'BattV_Min': [RangeValidator(low=float(columns[1][2]), high=float(columns[2][2]))], 'WS_ms_Avg': [RangeValidator(low=float(columns[1][3]), high=float(columns[2][3]))], 'WS_ms_Max': [RangeValidator(low=float(columns[1][4]), high=float(columns[2][4]))], 'WS_ms_TMx': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'WS_ms_S_WVT': [RangeValidator(low=float(columns[1][6]), high=float(columns[2][6]))], 'WindDir_D1_WVT': [RangeValidator(low=float(columns[1][7]), high=float(columns[2][8]))], 'WindDir_SD1_WVT': [RangeValidator(low=float(columns[1][8]), high=float(columns[2][9]))], 'SlrMJ_Tot': [ RangeValidator(low=float(columns[1][10]), high=float(columns[2][10])) ], 'Rain_mm_Tot': [ RangeValidator(low=float(columns[1][11]), high=float(columns[2][11])) ], 'AirTC_Avg': [ RangeValidator(low=float(columns[1][12]), high=float(columns[2][12])) ], 'AirTC_Max': [ RangeValidator(low=float(columns[1][13]), high=float(columns[2][13])) ], 'AirTC_TMx': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'AirTC_Min': [ RangeValidator(low=float(columns[1][14]), high=float(columns[2][14])) ], 'AirTC_TMn': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'RH_Max': [ RangeValidator(low=float(columns[1][16]), high=float(columns[2][16])) ], 'RH_TMx': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ], 'RH_Min': [ RangeValidator(low=float(columns[1][18]), high=float(columns[2][18])) ], 'RH_TMn': [ RegexValidator( pattern= "((19|20)\\d\\d)-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) ([2][0-3]|[0-1][0-9]|[1-9]):[0-5][0-9]:([0-5][0-9]|[6][0])$", empty_ok=False) ] }