def test_value_check_enumeration(): """Test value checks with the enumeration() function.""" field_names = ('foo', 'bar', 'baz') validator = CSVValidator(field_names) # define an enumeration directly with arguments validator.add_value_check('bar', enumeration('M', 'F')) # define an enumeration by passing in a list or tuple flavours = ('chocolate', 'vanilla', 'strawberry') validator.add_value_check('baz', enumeration(flavours)) data = ( ('foo', 'bar', 'baz'), ('1', 'M', 'chocolate'), ('2', 'F', 'maple pecan'), ('3', 'X', 'strawberry') ) problems = validator.validate(data) assert len(problems) == 2 p0 = problems[0] assert p0['code'] == VALUE_CHECK_FAILED assert p0['row'] == 3 assert p0['column'] == 3 assert p0['field'] == 'baz' assert p0['value'] == 'maple pecan' assert p0['record'] == ('2', 'F', 'maple pecan') p1 = problems[1] assert p1['code'] == VALUE_CHECK_FAILED assert p1['row'] == 4 assert p1['column'] == 2 assert p1['field'] == 'bar' assert p1['value'] == 'X' assert p1['record'] == ('3', 'X', 'strawberry')
def sampleValidation(): """ Sample code to validate a CSV file structure """ data_path = "data.csv" report_path = "csv_validation_problems_report.txt" field_names = ('date','','','type','reach','clicks','reactions') validator = cv.CSVValidator(field_names) validator.add_value_check('date',cv.datetime_string('%m/%d/%Y'),'EX1','invalid date') validator.add_value_check('type',cv.enumeration('Video','Photo','Link'),"EX2","invalid type found") validator.add_value_check('reach',int,"EX3","reach should be an integer") validator.add_value_check('clicks',int,"EX4","clicks should be an integer") validator.add_value_check('reactions',int,"EX4","reactions should be an integer") validate_csv_file(data_path,report_path,validator)
def _verify_csv_schema(file_content, separator, schema): ''' _verify_csv_schema Verifies the schema of csv data. Only required column names are confirmed :param file_content: The content of the file :type file_content: Python String :param separator: The delimeter character used in the file :type separator: Python Character :param schema: The csv schema we are expecting :type schema: Python String :raises Exception: When file_content schema is incorrect ''' file_content_lines = file_content.splitlines() csv_reader = csv.reader(file_content_lines, delimiter=separator) field_names = [] schema_properties = schema['properties'] for prop in schema_properties: field_names.append(prop['field']) # field_names = tuple(schema['properties']) validator = csvvalidator.CSVValidator(tuple(field_names)) validator.add_header_check('EX1', 'bad header') for prop in schema_properties: prop_field = prop['field'] prop_type = prop['type'] if prop_type == 'int': validator.add_value_check(prop_field, int, 'EX_INT', prop_field + ' must be an integer') elif prop_type == 'string': validator.add_value_check(prop_field, str, 'EX_STR', prop_field + ' must be a string') elif prop_type == 'enum': enum_values = tuple(prop['values']) validator.add_value_check( prop_field, csvvalidator.enumeration(enum_values), 'EX_ENUM', prop_field + ' must have value from enum') problems = validator.validate(csv_reader) if len(problems) > 0: raise VerifyFileSchemaException(str(problems))
def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ( 'study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
raise ValueError("Invalid GFSM revenue value: " + gfsm) def gfsmExpenseValidator(gfsm): """ Validator function. Checks if a string is a valid GFSM 2001 expenditure value. """ p = r"^2(1(1(1|2)?|2(1|2)?)?|2|3|4(1|2|3)?|5(1|2)?|6(1(1|2)?|2(1|2)?|3(1|2)?)?|7(1(1|2)?|2(1|2)?|3(1|2)?)?|8(1(1|2|3|4)?|2(1|2)?)?)?$" if re.search(p,gfsm): return gfsm raise ValueError("Invalid GFSM expense value: " + gfsm) # Validator for "type" fields. typeValidator = cv.enumeration( "personnel", "non-personnel recurrent", "capital", "other" ) field_validators = { # Special fields. "cofog": cofogValidator, "gfsmExpense": gfsmExpenseValidator, "gfsmRevenue": gfsmRevenueValidator, "type": typeValidator, # Ordinary ol' fields. "admin": str, "adminID": str, "adminOrgId": str, "amount": float, "amountAdjusted": float,