def validate(cursor, table, constraints, task_name): header = etl.header(table) problems = etl.validate(table, constraints=constraints, header=header) problems = etl.addfield(problems, 'task_name', task_name) problems = etl.addfield(problems, 'create_date', datetime.now()) # etl.todb(problems, cursor, 'etl_logs') etl.appenddb(problems, cursor, 'tetl_logs')
def validate(data, name, source, header, test, assertion): """Validate that a table meets the required constraints.""" s = data.get(source) if name: name = name + ' ' else: name = '' constraints = [] for c_data in test: constraint = {'name': c_data[0]} if c_data[1] != '_row_': constraint['field'] = c_data[1] constraint['test'] = eval(c_data[2]) constraints.append(constraint) for c_data in assertion: constraint = {'name': c_data[0]} if c_data[1] != '_row_': constraint['field'] = c_data[1] constraint['assertion'] = eval(c_data[2]) constraints.append(constraint) params = {} if header is not None and len(header) != 0: params['header'] = header if len(constraints) != 0: params['constraints'] = constraints problems = petl.validate(s, **params) if problems.nrows() > 0: unsync.secho('{}Validation Failed!'.format(name), fg='red') unsync.secho(str(problems.lookall()), fg='red') raise PETLValidationError(problems) else: if data.config.debug is True: unsync.secho('{}Validation Passed!'.format(name), fg='green')
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("--input-ccb-csv-filename", required=True, help="Input CCB CSV loading file to validate") parser.add_argument("--output-validation-csv-filename", required=True, help="Output CSV file that'll be created " \ "with validation results") args = parser.parse_args() table = petl.fromcsv(args.input_ccb_csv_filename) constraints = [ {'name': 'max_len_20', 'field':'Legal Name', 'assertion':max_len(20)}, {'name': 'max_len_30', 'field':'How They Heard', 'assertion':max_len(30)}, {'name': 'max_len_20', 'field':'Last Name', 'assertion':max_len(20)}, {'name': 'max_len_100', 'field':'Mailbox Number', 'assertion':max_len(100)}, {'name': 'max_len_20', 'field':'Middle Name', 'assertion':max_len(20)}, {'name': 'max_len_30', 'field':'Job Title', 'assertion':max_len(30)}, {'name': 'max_len_20', 'field':'First Name', 'assertion':max_len(20)}, {'name': 'max_len_30', 'field':'School', 'assertion':max_len(30)}, {'name': 'max_len_semisep_30', 'field':'Abilities/Skills', 'assertion':max_len_semisep(30)}, {'name': 'max_len_semisep_30', 'field':'Spiritual Gifts', 'assertion':max_len_semisep(30)}, {'name': 'max_len_semisep_30', 'field':'Passions', 'assertion':max_len_semisep(30)}, {'name': 'max_len_100', 'field':'Transferred Frm', 'assertion':max_len(100)}, {'name': 'max_len_100', 'field':'Transferred To', 'assertion':max_len(100)}, {'name': 'max_len_30', 'field':'How They Joined', 'assertion':max_len(30)}, {'name': 'max_len_30', 'field':'Membership Type', 'assertion':max_len(30)}, {'name': 'max_len_30', 'field':'Reason Left Church', 'assertion':max_len(30)}, {'name': 'max_len_100', 'field':'Pastr When Join', 'assertion':max_len(100)}, {'name': 'max_len_100', 'field':'Pastr When Leav', 'assertion':max_len(100)} ] validation_table = petl.validate(table, constraints=constraints) validation_table.progress(200).tocsv(args.output_validation_csv_filename) print "See output file '" + args.output_validation_csv_filename + "' for results" # Flush to ensure all output is written sys.stdout.flush() sys.stderr.flush()
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("--input-ccb-csv-filename", required=True, help="Input CCB CSV loading file to validate") parser.add_argument("--output-validation-csv-filename", required=True, help="Output CSV file that'll be created " \ "with validation results") args = parser.parse_args() table = petl.fromcsv(args.input_ccb_csv_filename) constraints = [{ 'name': 'max_len_20', 'field': 'Legal Name', 'assertion': max_len(20) }, { 'name': 'max_len_30', 'field': 'How They Heard', 'assertion': max_len(30) }, { 'name': 'max_len_20', 'field': 'Last Name', 'assertion': max_len(20) }, { 'name': 'max_len_100', 'field': 'Mailbox Number', 'assertion': max_len(100) }, { 'name': 'max_len_20', 'field': 'Middle Name', 'assertion': max_len(20) }, { 'name': 'max_len_30', 'field': 'Job Title', 'assertion': max_len(30) }, { 'name': 'max_len_20', 'field': 'First Name', 'assertion': max_len(20) }, { 'name': 'max_len_30', 'field': 'School', 'assertion': max_len(30) }, { 'name': 'max_len_semisep_30', 'field': 'Abilities/Skills', 'assertion': max_len_semisep(30) }, { 'name': 'max_len_semisep_30', 'field': 'Spiritual Gifts', 'assertion': max_len_semisep(30) }, { 'name': 'max_len_semisep_30', 'field': 'Passions', 'assertion': max_len_semisep(30) }, { 'name': 'max_len_100', 'field': 'Transferred Frm', 'assertion': max_len(100) }, { 'name': 'max_len_100', 'field': 'Transferred To', 'assertion': max_len(100) }, { 'name': 'max_len_30', 'field': 'How They Joined', 'assertion': max_len(30) }, { 'name': 'max_len_30', 'field': 'Membership Type', 'assertion': max_len(30) }, { 'name': 'max_len_30', 'field': 'Reason Left Church', 'assertion': max_len(30) }, { 'name': 'max_len_100', 'field': 'Pastr When Join', 'assertion': max_len(100) }, { 'name': 'max_len_100', 'field': 'Pastr When Leav', 'assertion': max_len(100) }] validation_table = petl.validate(table, constraints=constraints) validation_table.progress(200).tocsv(args.output_validation_csv_filename) print "See output file '" + args.output_validation_csv_filename + "' for results" # Flush to ensure all output is written sys.stdout.flush() sys.stderr.flush()
from __future__ import absolute_import, print_function, division # validate() ############ import petl as etl # define some validation constraints header = ('foo', 'bar', 'baz') constraints = [ dict(name='foo_int', field='foo', test=int), dict(name='bar_date', field='bar', test=etl.dateparser('%Y-%m-%d')), dict(name='baz_enum', field='baz', assertion=lambda v: v in ['Y', 'N']), dict(name='not_none', assertion=lambda row: None not in row) ] # now validate a table table = (('foo', 'bar', 'bazzz'), (1, '2000-01-01', 'Y'), ('x', '2010-10-10', 'N'), (2, '2000/01/01', 'Y'), (3, '2015-12-12', 'x'), (4, None, 'N'), ('y', '1999-99-99', 'z'), (6, '2000-01-01'), (7, '2001-02-02', 'N', True)) problems = etl.validate(table, constraints=constraints, header=header) problems.lookall()