def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ('study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion') validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def __init__(self, model): indicator_names = IndicatorType.objects.all()\ .values_list('nombre', flat=True) self.value_checks = [ ('fecha', datetime_string('%Y-%m-%d')), ('indicador_tipo', enumeration(list(indicator_names))), ('indicador_valor', str), ] self.unique_checks = ('fecha', 'indicador_tipo', ) if issubclass(model, AbstractIndicator): self.value_checks = self.value_checks + [ ('jurisdiccion_nombre', str), ('jurisdiccion_id', str), ] self.unique_checks = self.unique_checks + ('jurisdiccion_id', ) self.field_names = [check[0] for check in self.value_checks]
def sampleValidation(): """ Sample code to validate a CSV file structure """ data_path = "data.csv" report_path = "csv_validation_problems_report.txt" field_names = ('date','','','type','reach','clicks','reactions') validator = cv.CSVValidator(field_names) validator.add_value_check('date',cv.datetime_string('%m/%d/%Y'),'EX1','invalid date') validator.add_value_check('type',cv.enumeration('Video','Photo','Link'),"EX2","invalid type found") validator.add_value_check('reach',int,"EX3","reach should be an integer") validator.add_value_check('clicks',int,"EX4","clicks should be an integer") validator.add_value_check('reactions',int,"EX4","reactions should be an integer") validate_csv_file(data_path,report_path,validator)
def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ( 'study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def test_value_checks_datetime(): """Test value checks with datetimes.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_string('%Y-%m-%d')) data = ( ('foo', 'bar'), ('A', '1999-09-09'), # valid ('B', '1999-13-09'), # invalid month ('C', '1999-09-32'), # invalid day ('D', '1999-09-09ss') # invalid string ) problems = validator.validate(data) assert len(problems) == 3, problems for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 and problems[0]['field'] == 'bar' assert problems[1]['row'] == 4 and problems[1]['field'] == 'bar' assert problems[2]['row'] == 5 and problems[2]['field'] == 'bar'