def test_compound_unique_checks(): """Test the uniqueness checks on compound keys.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_unique_check(('foo', 'bar'), 'X5', 'custom message') data = ( ('foo', 'bar'), ('1', 'A'), ('2', 'B'), ('1', 'B'), ('2', 'A'), ('1', 'A') ) problems = validator.validate(data) n = len(problems) assert n == 1, n p = problems[0] assert p['code'] == 'X5' assert p['message'] == 'custom message' assert p['row'] == 6 assert p['key'] == ('foo', 'bar') assert p['value'] == ('1', 'A') assert p['record'] == ('1', 'A')
def test_compound_unique_checks_with_variable_record_lengths(): """Test the uniqueness checks on compound keys when record lengths vary.""" field_names = ('something', 'foo', 'bar') validator = CSVValidator(field_names) validator.add_unique_check(('foo', 'bar'), 'X5', 'custom message') data = ( ('something', 'foo', 'bar'), ('Z', '1', 'A'), ('Z', '2', 'B'), ('Z'), ('Z', '2', 'A'), ('Z', '1', 'A') ) problems = validator.validate(data) print problems n = len(problems) assert n == 1, n p = problems[0] assert p['code'] == 'X5' assert p['message'] == 'custom message' assert p['row'] == 6 assert p['key'] == ('foo', 'bar') assert p['value'] == ('1', 'A') assert p['record'] == ('Z', '1', 'A')
def test_unique_checks(): """Test the uniqueness checks.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_unique_check('foo') data = ( ('foo', 'bar'), ('1', 'A'), ('2', 'B'), ('1', 'C') ) problems = validator.validate(data) n = len(problems) assert n == 1, n p = problems[0] assert p['code'] == UNIQUE_CHECK_FAILED assert p['message'] == MESSAGES[UNIQUE_CHECK_FAILED] assert p['row'] == 4 assert p['key'] == 'foo' assert p['value'] == '1' assert p['record'] == ('1', 'C')
def test_unique_checks_with_variable_record_lengths(): """Test the uniqueness checks still work when record lengths vary.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_unique_check('bar') data = ( ('foo', 'bar'), ('1', 'A'), ('2'), ('3', 'A') ) problems = validator.validate(data) n = len(problems) assert n == 1, n p = problems[0] assert p['code'] == UNIQUE_CHECK_FAILED assert p['message'] == MESSAGES[UNIQUE_CHECK_FAILED] assert p['row'] == 4 assert p['key'] == 'bar' assert p['value'] == 'A' assert p['record'] == ('3', 'A')
def test_summarize(): """Test use of summarize option.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) def foo_gt_bar(r): return int(r['foo']) > int(r['bar']) validator.add_record_predicate(foo_gt_bar) data = ( ('foo', 'bar'), ('7', '3'), # valid ('1', '3') # invalid ) problems = validator.validate(data, summarize=True) n = len(problems) assert n == 1, n p = problems[0] assert p['code'] == RECORD_PREDICATE_FALSE for k in ('message', 'row', 'record'): assert k not in p
def generate(self): validator = CSVValidator(self.field_names) validator.add_header_check() validator.add_record_length_check() for value, check in self.value_checks: validator.add_value_check(value, check) validator.add_unique_check(self.unique_checks) return validator
def test_response_contents(self, register_fields, endpoint): response = requests.get(urljoin(endpoint, 'blobs.csv')) validator = CSVValidator(['_id'] + register_fields) validator.add_header_check() problems = validator.validate(csv.reader(response.text.split('\r\n'))) assert problems == [], '/blobs CSV fields do not match the register definition'
def test_ignore_lines(): """Test instructions to ignore lines works.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_header_check() validator.add_value_check('foo', int) validator.add_value_check('bar', float) data = ( ('ignore', 'me', 'please'), ('ignore', 'me', 'too', 'please'), ('foo', 'baz'), ('1.2', 'abc') ) problems = validator.validate(data, ignore_lines=2) assert len(problems) == 3 header_problems = [p for p in problems if p['code'] == HEADER_CHECK_FAILED] assert len(header_problems) == 1 assert header_problems[0]['row'] == 3 value_problems = [p for p in problems if p['code'] == VALUE_CHECK_FAILED] assert len(value_problems) == 2 for p in value_problems: assert p['row'] == 4
def test_value_check_numeric_ranges(): """Test value checks with numerical range functions.""" field_names = ('foo', 'bar', 'baz', 'quux') validator = CSVValidator(field_names) validator.add_value_check('foo', number_range_inclusive(2, 6, int)) validator.add_value_check('bar', number_range_exclusive(2, 6, int)) validator.add_value_check('baz', number_range_inclusive(2.0, 6.3, float)) validator.add_value_check('quux', number_range_exclusive(2.0, 6.3, float)) data = ( ('foo', 'bar', 'baz', 'quux'), ('2', '3', '2.0', '2.1'), # valid ('1', '3', '2.0', '2.1'), # foo invalid ('2', '2', '2.0', '2.1'), # bar invalid ('2', '3', '1.9', '2.1'), # baz invalid ('2', '3', '2.0', '2.0') # quux invalid ) problems = validator.validate(data) assert len(problems) == 4, len(problems) for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 and problems[0]['field'] == 'foo' assert problems[1]['row'] == 4 and problems[1]['field'] == 'bar' assert problems[2]['row'] == 5 and problems[2]['field'] == 'baz' assert problems[3]['row'] == 6 and problems[3]['field'] == 'quux'
def test_guard_conditions(): """Test some guard conditions.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) try: validator.add_value_check('foo', 'i am not callable') except AssertionError: pass # expected else: assert False, 'expected exception'
def test_value_check_enumeration(): """Test value checks with the enumeration() function.""" field_names = ('foo', 'bar', 'baz') validator = CSVValidator(field_names) # define an enumeration directly with arguments validator.add_value_check('bar', enumeration('M', 'F')) # define an enumeration by passing in a list or tuple flavours = ('chocolate', 'vanilla', 'strawberry') validator.add_value_check('baz', enumeration(flavours)) data = (('foo', 'bar', 'baz'), ('1', 'M', 'chocolate'), ('2', 'F', 'maple pecan'), ('3', 'X', 'strawberry')) problems = validator.validate(data) assert len(problems) == 2 p0 = problems[0] assert p0['code'] == VALUE_CHECK_FAILED assert p0['row'] == 3 assert p0['column'] == 3 assert p0['field'] == 'baz' assert p0['value'] == 'maple pecan' assert p0['record'] == ('2', 'F', 'maple pecan') p1 = problems[1] assert p1['code'] == VALUE_CHECK_FAILED assert p1['row'] == 4 assert p1['column'] == 2 assert p1['field'] == 'bar' assert p1['value'] == 'X' assert p1['record'] == ('3', 'X', 'strawberry')
def test_value_checks_datetime_range(): """Test value checks with datetime ranges.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check( 'bar', datetime_range_inclusive('1999-09-09', '2009-09-09', '%Y-%m-%d')) validator.add_value_check( 'bar', datetime_range_exclusive('1999-09-09', '2009-09-09', '%Y-%m-%d')) data = ( ('foo', 'bar'), ('A', '1999-09-10'), # valid ('B', '1999-09-09'), # invalid (exclusive) ('C', '2009-09-09'), # invalid (exclusive) ('D', '1999-09-08'), # invalid (both) ('E', '2009-09-10') # invalid (both) ) problems = validator.validate(data) assert len(problems) == 6, len(problems) assert len([p for p in problems if p['row'] == 3]) == 1 assert len([p for p in problems if p['row'] == 4]) == 1 assert len([p for p in problems if p['row'] == 5]) == 2 assert len([p for p in problems if p['row'] == 6]) == 2
def test_header_check(): """Test the header checks work.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_header_check() # use default code and message validator.add_header_check( code='X1', message='custom message') # provide custom code and message data = (('foo', 'baz'), ('123', '456')) problems = validator.validate(data) assert len(problems) == 2 p0 = problems[0] assert p0['code'] == HEADER_CHECK_FAILED assert p0['message'] == MESSAGES[HEADER_CHECK_FAILED] assert p0['record'] == ('foo', 'baz') assert p0['missing'] == set(['bar']) assert p0['unexpected'] == set(['baz']) assert p0['row'] == 1 p1 = problems[1] assert p1['code'] == 'X1' assert p1['message'] == 'custom message' assert p1['missing'] == set(['bar']) assert p1['unexpected'] == set(['baz']) assert p1['record'] == ('foo', 'baz') assert p1['row'] == 1
def _validate_csv(csv_file, output_file=None): """ Validates a CSV file. :param csv_file: The CSV file to validate :param output_file: The optional output file to which problems should be written :returns: True if the CSV file is valid, false otherwise """ field_names = _get_header(csv_file) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') with open(csv_file) as fp: data = csv.reader(fp) problems = validator.validate(data) if problems: write_problems(problems, output_file or sys.stdout) return False else: return True
def validate_csv(csv): field_names = ("provider_id", "kind", "name", "dosage", "measure", "amount") validator = CSVValidator(field_names) kinds = [kind[0] for kind in Medicine.KIND] validator.add_header_check("HEADER", "bad header") validator.add_value_check("provider_id", int, "PROVIDER", "provider_id must be an integer") validator.add_value_check("kind", enumeration(*kinds), "PROVIDER", f"kind must be {str(kinds)}") return validator.validate(csv)
def test_value_check_search_pattern(): """Test value checks with the search_pattern() function.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', search_pattern('\d{4}-\d{2}-\d{2}')) data = ( ('foo', 'bar'), ('1', '1999-01-01'), ('2', 'abcd-ef-gh'), ('3', 'a1999-01-01'), # this is valid - pattern attempts to match anywhere in line ('4', '1999-01-01a') # this is valid - pattern attempts to match anywhere in line ) problems = validator.validate(data) assert len(problems) == 1, len(problems) assert problems[0]['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3
def test_value_checks_with_missing_values(): """ Establish expected behaviour for value checks where there are missing values in the records. """ field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', float) data = ( ('foo', 'bar'), ('12', ) # this is missing value for bar, what happens to value check? ) problems = validator.validate(data) # missing values are ignored - use record length checks to find these assert len(problems) == 0
def test_value_checks_with_missing_values(): """ Establish expected behaviour for value checks where there are missing values in the records. """ field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', float) data = ( ('foo', 'bar'), ('12',) # this is missing value for bar, what happens to value check? ) problems = validator.validate(data) # missing values are ignored - use record length checks to find these assert len(problems) == 0
def test_record_checks(): """Test the use of record checks.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) def foo_gt_bar(r): foo = int(r['foo']) bar = int(r['bar']) if foo < bar: raise RecordError validator.add_record_check(foo_gt_bar) # use default code and message def foo_gt_2bar(r): foo = int(r['foo']) bar = int(r['bar']) if foo < 2 * bar: raise RecordError('X4', 'custom message') validator.add_record_check(foo_gt_2bar) data = ( ('foo', 'bar'), ('7', '3'), # valid ('5', '3'), # invalid - not foo_gt_2bar ('1', '3') # invalid - both predicates false ) problems = validator.validate(data) n = len(problems) assert n == 3, n row3_problems = [p for p in problems if p['row'] == 3] assert len(row3_problems) == 1 p = row3_problems[0] assert p['code'] == 'X4' assert p['message'] == 'custom message' assert p['record'] == ('5', '3') row4_problems = [p for p in problems if p['row'] == 4] assert len(row4_problems) == 2 row4_problems_default = [ p for p in row4_problems if p['code'] == RECORD_CHECK_FAILED ] assert len(row4_problems_default) == 1 p = row4_problems_default[0] assert p['message'] == MESSAGES[RECORD_CHECK_FAILED] assert p['record'] == ('1', '3') row4_problems_custom = [p for p in row4_problems if p['code'] == 'X4'] assert len(row4_problems_custom) == 1 p = row4_problems_custom[0] assert p['message'] == 'custom message' assert p['record'] == ('1', '3')
def test_header_check(): """Test the header checks work.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_header_check() # use default code and message validator.add_header_check(code='X1', message='custom message') # provide custom code and message data = ( ('foo', 'baz'), ('123', '456') ) problems = validator.validate(data) assert len(problems) == 2 p0 = problems[0] assert p0['code'] == HEADER_CHECK_FAILED assert p0['message'] == MESSAGES[HEADER_CHECK_FAILED] assert p0['record'] == ('foo', 'baz') assert p0['missing'] == set(['bar']) assert p0['unexpected'] == set(['baz']) assert p0['row'] == 1 p1 = problems[1] assert p1['code'] == 'X1' assert p1['message'] == 'custom message' assert p1['missing'] == set(['bar']) assert p1['unexpected'] == set(['baz']) assert p1['record'] == ('foo', 'baz') assert p1['row'] == 1
def test_limit(): """Test the use of the limit option.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) def foo_gt_bar(r): return int(r['foo']) > int(r['bar']) validator.add_record_predicate(foo_gt_bar) data = ( ('foo', 'bar'), ('7', '3'), # valid ('1', '3'), # invalid ('2', '3') # invalid ) problems = validator.validate(data, limit=1) n = len(problems) assert n == 1, n problems = validator.validate(data) n = len(problems) assert n == 2, n
def test_value_checks_datetime_range(): """Test value checks with datetime ranges.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_range_inclusive('1999-09-09', '2009-09-09', '%Y-%m-%d')) validator.add_value_check('bar', datetime_range_exclusive('1999-09-09', '2009-09-09', '%Y-%m-%d')) data = ( ('foo', 'bar'), ('A', '1999-09-10'), # valid ('B', '1999-09-09'), # invalid (exclusive) ('C', '2009-09-09'), # invalid (exclusive) ('D', '1999-09-08'), # invalid (both) ('E', '2009-09-10') # invalid (both) ) problems = validator.validate(data) assert len(problems) == 6, len(problems) assert len([p for p in problems if p['row'] == 3]) == 1 assert len([p for p in problems if p['row'] == 4]) == 1 assert len([p for p in problems if p['row'] == 5]) == 2 assert len([p for p in problems if p['row'] == 6]) == 2
def test_value_check_match_pattern(): """Test value checks with the match_pattern() function.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', match_pattern('\d{4}-\d{2}-\d{2}')) data = ( ('foo', 'bar'), ('1', '1999-01-01'), ('2', 'abcd-ef-gh'), ('3', 'a1999-01-01'), ('4', '1999-01-01a') # this is valid - pattern attempts to match at beginning of line ) problems = validator.validate(data) assert len(problems) == 2, len(problems) for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 assert problems[1]['row'] == 4
def test_value_checks_datetime_parser(): """Test value checks with datetimes parser.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_parse()) data = ( ('foo', 'bar'), ('A', '1999-09-09'), # valid ('B', '31-12-2009'), # valid ('C', '12-31-2032'), # valid ('D', '1999-09-09ss'), # invalid string ) problems = validator.validate(data) print(problems) assert len(problems) == 1, problems for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 5 and problems[0]['field'] == 'bar'
def test_skips(): """Test skip functions.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_record_length_check() validator.add_value_check('foo', int) def skip_pragma(record): return record[0].startswith('##') validator.add_skip(skip_pragma) data = (('foo', 'bar'), ('1', 'X'), ('## this row', 'should be', 'skipped'), ('3', 'Y')) problems = validator.validate(data) assert len(problems) == 0, problems
def test_context(): """Test passing in of context information.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) def foo_gt_bar(r): return int(r['foo']) > int(r['bar']) validator.add_record_predicate(foo_gt_bar) data = ( ('foo', 'bar'), ('7', '3'), # valid ('1', '3') # invalid ) context = {'info': 'file X'} problems = validator.validate(data, context=context) n = len(problems) assert n == 1, n p = problems[0] assert p['context'] == context
def test_value_checks_datetime(): """Test value checks with datetimes.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_string('%Y-%m-%d')) data = ( ('foo', 'bar'), ('A', '1999-09-09'), # valid ('B', '1999-13-09'), # invalid month ('C', '1999-09-32'), # invalid day ('D', '1999-09-09ss') # invalid string ) problems = validator.validate(data) assert len(problems) == 3, problems for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 and problems[0]['field'] == 'bar' assert problems[1]['row'] == 4 and problems[1]['field'] == 'bar' assert problems[2]['row'] == 5 and problems[2]['field'] == 'bar'
def test_value_checks_datetime_parser_year(): """Test value checks with datetimes parser yearfirst.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_parse(yearfirst=True)) data = ( ('foo', 'bar'), ('A', '1999-09-09'), # valid ('B', '1999-13-09'), # invalid month ('C', '1999-09-32'), # invalid day ('D', '1999-09-09ss') # invalid string ) problems = validator.validate(data) assert len(problems) == 3, problems for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 3 and problems[0]['field'] == 'bar' assert problems[1]['row'] == 4 and problems[1]['field'] == 'bar' assert problems[2]['row'] == 5 and problems[2]['field'] == 'bar'
def test_value_checks_datetime_parser_day(): """Test value checks with datetimes parser.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('bar', datetime_parse(dayfirst=True)) data = ( ('foo', 'bar'), ('A', '01-09-1991'), # valid ('B', '05-24-1999'), # valid ('C', '33-01-1991'), # invalid day ('D', '1999-09-09ss'), # invalid string ('E', '31-12-1999'), # valid ) problems = validator.validate(data) assert len(problems) == 2, problems for p in problems: assert p['code'] == VALUE_CHECK_FAILED assert problems[0]['row'] == 4 and problems[0]['field'] == 'bar' assert problems[1]['row'] == 5 and problems[1]['field'] == 'bar'
def test_record_predicates(): """Test the use of record predicates.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) def foo_gt_bar(r): return int(r['foo']) > int( r['bar']) # expect record will be a dictionary validator.add_record_predicate(foo_gt_bar) # use default code and message def foo_gt_2bar(r): return int(r['foo']) > 2 * int(r['bar']) validator.add_record_predicate(foo_gt_2bar, 'X4', 'custom message') data = ( ('foo', 'bar'), ('7', '3'), # valid ('5', '3'), # invalid - not foo_gt_2bar ('1', '3') # invalid - both predicates false ) problems = validator.validate(data) n = len(problems) assert n == 3, n row3_problems = [p for p in problems if p['row'] == 3] assert len(row3_problems) == 1 p = row3_problems[0] assert p['code'] == 'X4' assert p['message'] == 'custom message' assert p['record'] == ('5', '3') row4_problems = [p for p in problems if p['row'] == 4] assert len(row4_problems) == 2 row4_problems_default = [ p for p in row4_problems if p['code'] == RECORD_PREDICATE_FALSE ] assert len(row4_problems_default) == 1 p = row4_problems_default[0] assert p['message'] == MESSAGES[RECORD_PREDICATE_FALSE] assert p['record'] == ('1', '3') row4_problems_custom = [p for p in row4_problems if p['code'] == 'X4'] assert len(row4_problems_custom) == 1 p = row4_problems_custom[0] assert p['message'] == 'custom message' assert p['record'] == ('1', '3')
def test_record_checks(): """Test the use of record checks.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) def foo_gt_bar(r): foo = int(r['foo']) bar = int(r['bar']) if foo < bar: raise RecordError validator.add_record_check(foo_gt_bar) # use default code and message def foo_gt_2bar(r): foo = int(r['foo']) bar = int(r['bar']) if foo < 2 * bar: raise RecordError('X4', 'custom message') validator.add_record_check(foo_gt_2bar) data = ( ('foo', 'bar'), ('7', '3'), # valid ('5', '3'), # invalid - not foo_gt_2bar ('1', '3') # invalid - both predicates false ) problems = validator.validate(data) n = len(problems) assert n == 3, n row3_problems = [p for p in problems if p['row'] == 3] assert len(row3_problems) == 1 p = row3_problems[0] assert p['code'] == 'X4' assert p['message'] == 'custom message' assert p['record'] == ('5', '3') row4_problems = [p for p in problems if p['row'] == 4] assert len(row4_problems) == 2 row4_problems_default = [p for p in row4_problems if p['code'] == RECORD_CHECK_FAILED] assert len(row4_problems_default) == 1 p = row4_problems_default[0] assert p['message'] == MESSAGES[RECORD_CHECK_FAILED] assert p['record'] == ('1', '3') row4_problems_custom = [p for p in row4_problems if p['code'] == 'X4'] assert len(row4_problems_custom) == 1 p = row4_problems_custom[0] assert p['message'] == 'custom message' assert p['record'] == ('1', '3')
def test_record_length_checks(): """Test the record length checks.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_record_length_check() # test default code and message validator.add_record_length_check('X2', 'custom message') data = ( ('foo', 'bar'), ('12', '3.4'), ('12', ), # be careful with syntax for singleton tuples ('12', '3.4', 'spong')) problems = validator.validate(data) assert len(problems) == 4, len(problems) # find problems reported under default code default_problems = [ p for p in problems if p['code'] == RECORD_LENGTH_CHECK_FAILED ] assert len(default_problems) == 2 d0 = default_problems[0] assert d0['message'] == MESSAGES[RECORD_LENGTH_CHECK_FAILED] assert d0['row'] == 3 assert d0['record'] == ('12', ) assert d0['length'] == 1 d1 = default_problems[1] assert d1['message'] == MESSAGES[RECORD_LENGTH_CHECK_FAILED] assert d1['row'] == 4 assert d1['record'] == ('12', '3.4', 'spong') assert d1['length'] == 3 # find problems reported under custom code custom_problems = [p for p in problems if p['code'] == 'X2'] assert len(custom_problems) == 2 c0 = custom_problems[0] assert c0['message'] == 'custom message' assert c0['row'] == 3 assert c0['record'] == ('12', ) assert c0['length'] == 1 c1 = custom_problems[1] assert c1['message'] == 'custom message' assert c1['row'] == 4 assert c1['record'] == ('12', '3.4', 'spong') assert c1['length'] == 3
def test_skips(): """Test skip functions.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_record_length_check() validator.add_value_check('foo', int) def skip_pragma(record): return record[0].startswith('##') validator.add_skip(skip_pragma) data = ( ('foo', 'bar'), ('1', 'X'), ('## this row', 'should be', 'skipped'), ('3', 'Y') ) problems = validator.validate(data) assert len(problems) == 0, problems
def test_record_predicates(): """Test the use of record predicates.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) def foo_gt_bar(r): return int(r['foo']) > int(r['bar']) # expect record will be a dictionary validator.add_record_predicate(foo_gt_bar) # use default code and message def foo_gt_2bar(r): return int(r['foo']) > 2 * int(r['bar']) validator.add_record_predicate(foo_gt_2bar, 'X4', 'custom message') data = ( ('foo', 'bar'), ('7', '3'), # valid ('5', '3'), # invalid - not foo_gt_2bar ('1', '3') # invalid - both predicates false ) problems = validator.validate(data) n = len(problems) assert n == 3, n row3_problems = [p for p in problems if p['row'] == 3] assert len(row3_problems) == 1 p = row3_problems[0] assert p['code'] == 'X4' assert p['message'] == 'custom message' assert p['record'] == ('5', '3') row4_problems = [p for p in problems if p['row'] == 4] assert len(row4_problems) == 2 row4_problems_default = [p for p in row4_problems if p['code'] == RECORD_PREDICATE_FALSE] assert len(row4_problems_default) == 1 p = row4_problems_default[0] assert p['message'] == MESSAGES[RECORD_PREDICATE_FALSE] assert p['record'] == ('1', '3') row4_problems_custom = [p for p in row4_problems if p['code'] == 'X4'] assert len(row4_problems_custom) == 1 p = row4_problems_custom[0] assert p['message'] == 'custom message' assert p['record'] == ('1', '3')
def test_record_length_checks(): """Test the record length checks.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_record_length_check() # test default code and message validator.add_record_length_check('X2', 'custom message') data = ( ('foo', 'bar'), ('12', '3.4'), ('12',), # be careful with syntax for singleton tuples ('12', '3.4', 'spong') ) problems = validator.validate(data) assert len(problems) == 4, len(problems) # find problems reported under default code default_problems = [p for p in problems if p['code'] == RECORD_LENGTH_CHECK_FAILED] assert len(default_problems) == 2 d0 = default_problems[0] assert d0['message'] == MESSAGES[RECORD_LENGTH_CHECK_FAILED] assert d0['row'] == 3 assert d0['record'] == ('12',) assert d0['length'] == 1 d1 = default_problems[1] assert d1['message'] == MESSAGES[RECORD_LENGTH_CHECK_FAILED] assert d1['row'] == 4 assert d1['record'] == ('12', '3.4', 'spong') assert d1['length'] == 3 # find problems reported under custom code custom_problems = [p for p in problems if p['code'] == 'X2'] assert len(custom_problems) == 2 c0 = custom_problems[0] assert c0['message'] == 'custom message' assert c0['row'] == 3 assert c0['record'] == ('12',) assert c0['length'] == 1 c1 = custom_problems[1] assert c1['message'] == 'custom message' assert c1['row'] == 4 assert c1['record'] == ('12', '3.4', 'spong') assert c1['length'] == 3
def test_value_predicates(): """Test the use of value predicates.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) foo_predicate = lambda v: math.pow(float(v), 2) < 64 validator.add_value_predicate('foo', foo_predicate) bar_predicate = lambda v: math.sqrt(float(v)) > 8 validator.add_value_predicate('bar', bar_predicate, 'X3', 'custom message') data = ( ('foo', 'bar'), ('4', '81'), # valid ('9', '81'), # foo invalid ('4', '49') # bar invalid ) problems = validator.validate(data) assert len(problems) == 2, len(problems) p0 = problems[0] assert p0['code'] == VALUE_PREDICATE_FALSE assert p0['message'] == MESSAGES[VALUE_PREDICATE_FALSE] assert p0['row'] == 3 assert p0['column'] == 1 assert p0['field'] == 'foo' assert p0['value'] == '9' assert p0['record'] == ('9', '81') p1 = problems[1] assert p1['code'] == 'X3' assert p1['message'] == 'custom message' assert p1['row'] == 4 assert p1['column'] == 2 assert p1['field'] == 'bar' assert p1['value'] == '49' assert p1['record'] == ('4', '49')
def test_value_check_enumeration(): """Test value checks with the enumeration() function.""" field_names = ('foo', 'bar', 'baz') validator = CSVValidator(field_names) # define an enumeration directly with arguments validator.add_value_check('bar', enumeration('M', 'F')) # define an enumeration by passing in a list or tuple flavours = ('chocolate', 'vanilla', 'strawberry') validator.add_value_check('baz', enumeration(flavours)) data = ( ('foo', 'bar', 'baz'), ('1', 'M', 'chocolate'), ('2', 'F', 'maple pecan'), ('3', 'X', 'strawberry') ) problems = validator.validate(data) assert len(problems) == 2 p0 = problems[0] assert p0['code'] == VALUE_CHECK_FAILED assert p0['row'] == 3 assert p0['column'] == 3 assert p0['field'] == 'baz' assert p0['value'] == 'maple pecan' assert p0['record'] == ('2', 'F', 'maple pecan') p1 = problems[1] assert p1['code'] == VALUE_CHECK_FAILED assert p1['row'] == 4 assert p1['column'] == 2 assert p1['field'] == 'bar' assert p1['value'] == 'X' assert p1['record'] == ('3', 'X', 'strawberry')
def test_exception_handling(): """Establish expectations for exception handling.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('foo', int) def buggy_value_check(v): """I am a buggy value check.""" raise Exception('something went wrong') validator.add_value_check('bar', buggy_value_check) def buggy_value_predicate(v): """I am a buggy value predicate.""" raise Exception('something went wrong') validator.add_value_predicate('bar', buggy_value_predicate) def buggy_record_check(r): """I am a buggy record check.""" raise Exception('something went wrong') validator.add_record_check(buggy_record_check) def buggy_record_predicate(r): """I am a buggy record predicate.""" raise Exception('something went wrong') validator.add_record_predicate(buggy_record_predicate) def buggy_assert(r): """I am a buggy assert.""" raise Exception('something went wrong') validator.assert_something_buggy = buggy_assert def buggy_check(r): """I am a buggy check.""" raise Exception('something went wrong') validator.check_something_buggy = buggy_check def buggy_each(r): """I am a buggy each.""" raise Exception('something went wrong') validator.each_something_buggy = buggy_each def buggy_finally_assert(): """I am a buggy finally assert.""" raise Exception('something went wrong') validator.finally_assert_something_buggy = buggy_finally_assert def buggy_skip(record): """I am a buggy skip.""" raise Exception('something went wrong') validator.add_skip(buggy_skip) data = (('foo', 'bar'), ('ab', '56')) problems = validator.validate(data, report_unexpected_exceptions=False) n = len(problems) assert n == 1, n p = problems[0] assert p['row'] == 2 problems = validator.validate( data) # by default, exceptions are reported as problems n = len(problems) assert n == 10, n unexpected_problems = [ p for p in problems if p['code'] == UNEXPECTED_EXCEPTION ] assert len(unexpected_problems) == 9 for p in unexpected_problems: e = p['exception'] assert e.args[0] == 'something went wrong', e.args
def test_exception_handling(): """Establish expectations for exception handling.""" field_names = ('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('foo', int) def buggy_value_check(v): """I am a buggy value check.""" raise Exception('something went wrong') validator.add_value_check('bar', buggy_value_check) def buggy_value_predicate(v): """I am a buggy value predicate.""" raise Exception('something went wrong') validator.add_value_predicate('bar', buggy_value_predicate) def buggy_record_check(r): """I am a buggy record check.""" raise Exception('something went wrong') validator.add_record_check(buggy_record_check) def buggy_record_predicate(r): """I am a buggy record predicate.""" raise Exception('something went wrong') validator.add_record_predicate(buggy_record_predicate) def buggy_assert(r): """I am a buggy assert.""" raise Exception('something went wrong') validator.assert_something_buggy = buggy_assert def buggy_check(r): """I am a buggy check.""" raise Exception('something went wrong') validator.check_something_buggy = buggy_check def buggy_each(r): """I am a buggy each.""" raise Exception('something went wrong') validator.each_something_buggy = buggy_each def buggy_finally_assert(): """I am a buggy finally assert.""" raise Exception('something went wrong') validator.finally_assert_something_buggy = buggy_finally_assert def buggy_skip(record): """I am a buggy skip.""" raise Exception('something went wrong') validator.add_skip(buggy_skip) data = ( ('foo', 'bar'), ('ab', '56') ) problems = validator.validate(data, report_unexpected_exceptions=False) n = len(problems) assert n == 1, n p = problems[0] assert p['row'] == 2 problems = validator.validate(data) # by default, exceptions are reported as problems n = len(problems) assert n == 10, n unexpected_problems = [p for p in problems if p['code'] == UNEXPECTED_EXCEPTION] assert len(unexpected_problems) == 9 for p in unexpected_problems: e = p['exception'] assert e.args[0] == 'something went wrong', e.args
def create_validator(): """Create an example CSV validator for patient demographic data.""" field_names = ( 'study_id', 'patient_id', 'gender', 'age_years', 'age_months', 'date_inclusion' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('study_id', int, 'EX3', 'study id must be an integer') validator.add_value_check('patient_id', int, 'EX4', 'patient id must be an integer') validator.add_value_check('gender', enumeration('M', 'F'), 'EX5', 'invalid gender') validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 'EX6', 'invalid age in years') validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'), 'EX7', 'invalid date') # a more complicated record check def check_age_variables(r): age_years = int(r['age_years']) age_months = int(r['age_months']) valid = (age_months >= age_years * 12 and age_months % age_years < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator
def test_value_checks(): """Some very simple tests of value checks.""" # a simple validator to be tested field_names=('foo', 'bar') validator = CSVValidator(field_names) validator.add_value_check('foo', int) validator.add_value_check('bar', float) # some test data data = ( ('foo', 'bar'), # row 1 - header row ('12', '3.4'), # row 2 - valid ('1.2', '3.4'), # row 3 - foo invalid ('abc', '3.4'), # row 4 - foo invalid ('12', 'abc'), # row 5 - bar invalid ('', '3.4'), # row 6 - foo invalid (empty) ('12', ''), # row 7 - bar invalid (empty) ('abc', 'def') # row 8 - both invalid ) # run the validator on the test data problems = validator.validate(data) assert len(problems) == 7 # N.B., expect row and column indices start from 1 problems_row2 = [p for p in problems if p['row'] == 2] assert len(problems_row2) == 0 # should be valid problems_row3 = [p for p in problems if p['row'] == 3] assert len(problems_row3) == 1 p = problems_row3[0] # convenience variable assert p['column'] == 1 # report column index assert p['field'] == 'foo' # report field name assert p['code'] == VALUE_CHECK_FAILED # default problem code for value checks assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] # default message assert p['value'] == '1.2' # report bad value assert p['record'] == ('1.2', '3.4') # report record problems_row4 = [p for p in problems if p['row'] == 4] assert len(problems_row4) == 1 p = problems_row4[0] # convenience variable assert p['column'] == 1 assert p['field'] == 'foo' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == 'abc' assert p['record'] == ('abc', '3.4') problems_row5 = [p for p in problems if p['row'] == 5] assert len(problems_row5) == 1 p = problems_row5[0] # convenience variable assert p['column'] == 2 assert p['field'] == 'bar' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == 'abc' assert p['record'] == ('12', 'abc') problems_row6 = [p for p in problems if p['row'] == 6] assert len(problems_row6) == 1 p = problems_row6[0] # convenience variable assert p['column'] == 1 assert p['field'] == 'foo' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == '' assert p['record'] == ('', '3.4') problems_row7 = [p for p in problems if p['row'] == 7] assert len(problems_row7) == 1 p = problems_row7[0] # convenience variable assert p['column'] == 2 assert p['field'] == 'bar' assert p['code'] == VALUE_CHECK_FAILED assert p['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p['value'] == '' assert p['record'] == ('12', '') problems_row8 = [p for p in problems if p['row'] == 8] assert len(problems_row8) == 2 # expect both problems are found p0 = problems_row8[0] # convenience variable assert p0['column'] == 1 assert p0['field'] == 'foo' assert p0['code'] == VALUE_CHECK_FAILED assert p0['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p0['value'] == 'abc' assert p0['record'] == ('abc', 'def') p1 = problems_row8[1] # convenience variable assert p1['column'] == 2 assert p1['field'] == 'bar' assert p1['code'] == VALUE_CHECK_FAILED assert p1['message'] == MESSAGES[VALUE_CHECK_FAILED] assert p1['value'] == 'def' assert p1['record'] == ('abc', 'def')
def create_validator(): """Create an example CSV validator for patient demographic data.""" # def CheckAlpha(s=''): if len(s) > 0: # s=FixString(s) if not s.replace(" ","").isalpha() and len(s) > 0: return False # Logit("CheckAlpha: LineNo - " + str(ln+1) + " | Mem ID - " + curline[0] + " |" + COLDESC[(i+1)] +" - " + s + " :: Not a alphabetic letter.") return True field_names = ( 'CUSTID', 'FIRSTNAME', 'LASTNAME', 'CUSTNM', 'ADDRESS1', 'ADDRESS2', 'POSTCODE', 'CITY', 'STATE', 'WORKPHONE', 'WORKFAX', 'PHONE', 'MOBILE', 'EMAIL', 'ORGANISATION', 'EOL' ) validator = CSVValidator(field_names) # basic header and record length checks validator.add_header_check('EX1', 'bad header') validator.add_record_length_check('EX2', 'unexpected record length') # some simple value checks validator.add_value_check('CUSTID', int, 'EX3', 'CUSTID must be an integer') validator.add_value_check('FIRSTNAME', CheckAlpha, 'EX4', 'FIRSTNAME must be an integer') validator.add_value_check('LASTNAME', str, 'EX5', 'invalid LASTNAME') validator.add_value_check('CUSTNM', str, 'EX6', 'invalid CUSTNM') validator.add_value_check('ADDRESS1', str, 'EX7', 'invalid ADDRESS1') validator.add_value_check('ADDRESS2', str, 'EX8', 'invalid ADDRESS2') validator.add_value_check('POSTCODE', int, 'EX9', 'invalid POSTCODE') validator.add_value_check('CITY', str, 'EX10', 'invalid CITY') validator.add_value_check('STATE', str, 'EX11', 'invalid STATE') validator.add_value_check('WORKPHONE', int, 'EX12', 'invalid WORKPHONE') validator.add_value_check('WORKFAX', int, 'EX13', 'invalid WORKFAC') validator.add_value_check('PHONE', int, 'EX14', 'invalid PHONE') validator.add_value_check('MOBILE', int, 'EX15', 'invalid MOBILE') validator.add_value_check('EMAIL', str, 'EX16', 'invalid EMAIL') validator.add_value_check('ORGANISATION', str, 'EX17', 'invalid ORGANISATION') validator.add_value_check('EOL', str, 'EX18', 'invalid EOL') # a more complicated record check def check_age_variables(r): CUSTNM = int(r['CUSTNM']) ADDRESS1 = int(r['ADDRESS1']) valid = (ADDRESS1 >= CUSTNM * 12 and ADDRESS1 % CUSTNM < 12) if not valid: raise RecordError('EX8', 'invalid age variables') validator.add_record_check(check_age_variables) return validator