Example #1
0
def validate(cursor, table, constraints, task_name):
    header = etl.header(table)
    problems = etl.validate(table, constraints=constraints, header=header)
    problems = etl.addfield(problems, 'task_name', task_name)
    problems = etl.addfield(problems, 'create_date', datetime.now())

    # etl.todb(problems, cursor, 'etl_logs')
    etl.appenddb(problems, cursor, 'tetl_logs')
Example #2
0
def validate(data, name, source, header, test, assertion):
    """Validate that a table meets the required constraints."""
    s = data.get(source)

    if name:
        name = name + ' '
    else:
        name = ''

    constraints = []
    for c_data in test:
        constraint = {'name': c_data[0]}
        if c_data[1] != '_row_':
            constraint['field'] = c_data[1]
        constraint['test'] = eval(c_data[2])
        constraints.append(constraint)

    for c_data in assertion:
        constraint = {'name': c_data[0]}
        if c_data[1] != '_row_':
            constraint['field'] = c_data[1]
        constraint['assertion'] = eval(c_data[2])
        constraints.append(constraint)

    params = {}
    if header is not None and len(header) != 0:
        params['header'] = header
    if len(constraints) != 0:
        params['constraints'] = constraints

    problems = petl.validate(s, **params)

    if problems.nrows() > 0:
        unsync.secho('{}Validation Failed!'.format(name), fg='red')
        unsync.secho(str(problems.lookall()), fg='red')
        raise PETLValidationError(problems)
    else:
        if data.config.debug is True:
            unsync.secho('{}Validation Passed!'.format(name), fg='green')
Example #3
0
def main(argv):

    parser = argparse.ArgumentParser()
    parser.add_argument("--input-ccb-csv-filename", required=True, help="Input CCB CSV loading file to validate")
    parser.add_argument("--output-validation-csv-filename", required=True, help="Output CSV file that'll be created " \
        "with validation results")
    args = parser.parse_args()

    table = petl.fromcsv(args.input_ccb_csv_filename)

    constraints = [
        {'name': 'max_len_20', 'field':'Legal Name', 'assertion':max_len(20)},
        {'name': 'max_len_30', 'field':'How They Heard', 'assertion':max_len(30)},
        {'name': 'max_len_20', 'field':'Last Name', 'assertion':max_len(20)},
        {'name': 'max_len_100', 'field':'Mailbox Number', 'assertion':max_len(100)},
        {'name': 'max_len_20', 'field':'Middle Name', 'assertion':max_len(20)},
        {'name': 'max_len_30', 'field':'Job Title', 'assertion':max_len(30)},
        {'name': 'max_len_20', 'field':'First Name', 'assertion':max_len(20)},
        {'name': 'max_len_30', 'field':'School', 'assertion':max_len(30)},
        {'name': 'max_len_semisep_30', 'field':'Abilities/Skills', 'assertion':max_len_semisep(30)},
        {'name': 'max_len_semisep_30', 'field':'Spiritual Gifts', 'assertion':max_len_semisep(30)},
        {'name': 'max_len_semisep_30', 'field':'Passions', 'assertion':max_len_semisep(30)},
        {'name': 'max_len_100', 'field':'Transferred Frm', 'assertion':max_len(100)},
        {'name': 'max_len_100', 'field':'Transferred To', 'assertion':max_len(100)},
        {'name': 'max_len_30', 'field':'How They Joined', 'assertion':max_len(30)},
        {'name': 'max_len_30', 'field':'Membership Type', 'assertion':max_len(30)},
        {'name': 'max_len_30', 'field':'Reason Left Church', 'assertion':max_len(30)},
        {'name': 'max_len_100', 'field':'Pastr When Join', 'assertion':max_len(100)},
        {'name': 'max_len_100', 'field':'Pastr When Leav', 'assertion':max_len(100)}
    ]

    validation_table = petl.validate(table, constraints=constraints)
    validation_table.progress(200).tocsv(args.output_validation_csv_filename)
    print "See output file '" + args.output_validation_csv_filename + "' for results"

    # Flush to ensure all output is written
    sys.stdout.flush()
    sys.stderr.flush()
Example #4
0
def main(argv):

    parser = argparse.ArgumentParser()
    parser.add_argument("--input-ccb-csv-filename",
                        required=True,
                        help="Input CCB CSV loading file to validate")
    parser.add_argument("--output-validation-csv-filename", required=True, help="Output CSV file that'll be created " \
        "with validation results")
    args = parser.parse_args()

    table = petl.fromcsv(args.input_ccb_csv_filename)

    constraints = [{
        'name': 'max_len_20',
        'field': 'Legal Name',
        'assertion': max_len(20)
    }, {
        'name': 'max_len_30',
        'field': 'How They Heard',
        'assertion': max_len(30)
    }, {
        'name': 'max_len_20',
        'field': 'Last Name',
        'assertion': max_len(20)
    }, {
        'name': 'max_len_100',
        'field': 'Mailbox Number',
        'assertion': max_len(100)
    }, {
        'name': 'max_len_20',
        'field': 'Middle Name',
        'assertion': max_len(20)
    }, {
        'name': 'max_len_30',
        'field': 'Job Title',
        'assertion': max_len(30)
    }, {
        'name': 'max_len_20',
        'field': 'First Name',
        'assertion': max_len(20)
    }, {
        'name': 'max_len_30',
        'field': 'School',
        'assertion': max_len(30)
    }, {
        'name': 'max_len_semisep_30',
        'field': 'Abilities/Skills',
        'assertion': max_len_semisep(30)
    }, {
        'name': 'max_len_semisep_30',
        'field': 'Spiritual Gifts',
        'assertion': max_len_semisep(30)
    }, {
        'name': 'max_len_semisep_30',
        'field': 'Passions',
        'assertion': max_len_semisep(30)
    }, {
        'name': 'max_len_100',
        'field': 'Transferred Frm',
        'assertion': max_len(100)
    }, {
        'name': 'max_len_100',
        'field': 'Transferred To',
        'assertion': max_len(100)
    }, {
        'name': 'max_len_30',
        'field': 'How They Joined',
        'assertion': max_len(30)
    }, {
        'name': 'max_len_30',
        'field': 'Membership Type',
        'assertion': max_len(30)
    }, {
        'name': 'max_len_30',
        'field': 'Reason Left Church',
        'assertion': max_len(30)
    }, {
        'name': 'max_len_100',
        'field': 'Pastr When Join',
        'assertion': max_len(100)
    }, {
        'name': 'max_len_100',
        'field': 'Pastr When Leav',
        'assertion': max_len(100)
    }]

    validation_table = petl.validate(table, constraints=constraints)
    validation_table.progress(200).tocsv(args.output_validation_csv_filename)
    print "See output file '" + args.output_validation_csv_filename + "' for results"

    # Flush to ensure all output is written
    sys.stdout.flush()
    sys.stderr.flush()
Example #5
0
from __future__ import absolute_import, print_function, division


# validate()
############

import petl as etl
# define some validation constraints
header = ('foo', 'bar', 'baz')
constraints = [
    dict(name='foo_int', field='foo', test=int),
    dict(name='bar_date', field='bar', test=etl.dateparser('%Y-%m-%d')),
    dict(name='baz_enum', field='baz', assertion=lambda v: v in ['Y', 'N']),
    dict(name='not_none', assertion=lambda row: None not in row)
]
# now validate a table
table = (('foo', 'bar', 'bazzz'),
         (1, '2000-01-01', 'Y'),
         ('x', '2010-10-10', 'N'),
         (2, '2000/01/01', 'Y'),
         (3, '2015-12-12', 'x'),
         (4, None, 'N'),
         ('y', '1999-99-99', 'z'),
         (6, '2000-01-01'),
         (7, '2001-02-02', 'N', True))
problems = etl.validate(table, constraints=constraints, header=header)
problems.lookall()