Beispiel #1
0
def test_value_check_enumeration():
    """Test value checks with the enumeration() function."""

    field_names = ('foo', 'bar', 'baz')
    validator = CSVValidator(field_names)
    # define an enumeration directly with arguments
    validator.add_value_check('bar', enumeration('M', 'F'))
    # define an enumeration by passing in a list or tuple
    flavours = ('chocolate', 'vanilla', 'strawberry')
    validator.add_value_check('baz', enumeration(flavours))

    data = (
            ('foo', 'bar', 'baz'),
            ('1', 'M', 'chocolate'),
            ('2', 'F', 'maple pecan'),
            ('3', 'X', 'strawberry')
            )

    problems = validator.validate(data)
    assert len(problems) == 2

    p0 = problems[0]
    assert p0['code'] == VALUE_CHECK_FAILED
    assert p0['row'] == 3
    assert p0['column'] == 3
    assert p0['field'] == 'baz'
    assert p0['value'] == 'maple pecan'
    assert p0['record'] == ('2', 'F', 'maple pecan')

    p1 = problems[1]
    assert p1['code'] == VALUE_CHECK_FAILED
    assert p1['row'] == 4
    assert p1['column'] == 2
    assert p1['field'] == 'bar'
    assert p1['value'] == 'X'
    assert p1['record'] == ('3', 'X', 'strawberry')
def sampleValidation():
    """
        Sample code to validate a CSV file structure
    """
    data_path = "data.csv"
    report_path = "csv_validation_problems_report.txt"

    field_names = ('date','','','type','reach','clicks','reactions')
    validator = cv.CSVValidator(field_names)

    validator.add_value_check('date',cv.datetime_string('%m/%d/%Y'),'EX1','invalid date')
    validator.add_value_check('type',cv.enumeration('Video','Photo','Link'),"EX2","invalid type found")
    validator.add_value_check('reach',int,"EX3","reach should be an integer")
    validator.add_value_check('clicks',int,"EX4","clicks should be an integer")
    validator.add_value_check('reactions',int,"EX4","reactions should be an integer")

    validate_csv_file(data_path,report_path,validator)
def _verify_csv_schema(file_content, separator, schema):
    '''
    _verify_csv_schema Verifies the schema of csv data. Only required
    column names are confirmed

    :param file_content: The content of the file
    :type file_content: Python String
    :param separator: The delimeter character used in the file
    :type separator: Python Character
    :param schema: The csv schema we are expecting
    :type schema: Python String
    :raises Exception: When file_content schema is incorrect
    '''
    file_content_lines = file_content.splitlines()
    csv_reader = csv.reader(file_content_lines, delimiter=separator)

    field_names = []
    schema_properties = schema['properties']
    for prop in schema_properties:
        field_names.append(prop['field'])

    # field_names = tuple(schema['properties'])

    validator = csvvalidator.CSVValidator(tuple(field_names))
    validator.add_header_check('EX1', 'bad header')

    for prop in schema_properties:
        prop_field = prop['field']
        prop_type = prop['type']
        if prop_type == 'int':
            validator.add_value_check(prop_field, int, 'EX_INT',
                                      prop_field + ' must be an integer')
        elif prop_type == 'string':
            validator.add_value_check(prop_field, str, 'EX_STR',
                                      prop_field + ' must be a string')
        elif prop_type == 'enum':
            enum_values = tuple(prop['values'])
            validator.add_value_check(
                prop_field, csvvalidator.enumeration(enum_values), 'EX_ENUM',
                prop_field + ' must have value from enum')

    problems = validator.validate(csv_reader)

    if len(problems) > 0:
        raise VerifyFileSchemaException(str(problems))
Beispiel #4
0
def create_validator():
    """Create an example CSV validator for patient demographic data."""

    field_names = (
                   'study_id', 
                   'patient_id', 
                   'gender', 
                   'age_years', 
                   'age_months',
                   'date_inclusion'
                   )
    validator = CSVValidator(field_names)
    
    # basic header and record length checks
    validator.add_header_check('EX1', 'bad header')
    validator.add_record_length_check('EX2', 'unexpected record length')
    
    # some simple value checks
    validator.add_value_check('study_id', int, 
                              'EX3', 'study id must be an integer')
    validator.add_value_check('patient_id', int, 
                              'EX4', 'patient id must be an integer')
    validator.add_value_check('gender', enumeration('M', 'F'), 
                              'EX5', 'invalid gender')
    validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 
                              'EX6', 'invalid age in years')
    validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'),
                              'EX7', 'invalid date')
    
    # a more complicated record check
    def check_age_variables(r):
        age_years = int(r['age_years'])
        age_months = int(r['age_months'])
        valid = (age_months >= age_years * 12 and 
                 age_months % age_years < 12)
        if not valid:
            raise RecordError('EX8', 'invalid age variables')
    validator.add_record_check(check_age_variables)
    
    return validator
Beispiel #5
0
def create_validator():
    """Create an example CSV validator for patient demographic data."""

    field_names = (
                   'study_id', 
                   'patient_id', 
                   'gender', 
                   'age_years', 
                   'age_months',
                   'date_inclusion'
                   )
    validator = CSVValidator(field_names)
    
    # basic header and record length checks
    validator.add_header_check('EX1', 'bad header')
    validator.add_record_length_check('EX2', 'unexpected record length')
    
    # some simple value checks
    validator.add_value_check('study_id', int, 
                              'EX3', 'study id must be an integer')
    validator.add_value_check('patient_id', int, 
                              'EX4', 'patient id must be an integer')
    validator.add_value_check('gender', enumeration('M', 'F'), 
                              'EX5', 'invalid gender')
    validator.add_value_check('age_years', number_range_inclusive(0, 120, int), 
                              'EX6', 'invalid age in years')
    validator.add_value_check('date_inclusion', datetime_string('%Y-%m-%d'),
                              'EX7', 'invalid date')
    
    # a more complicated record check
    def check_age_variables(r):
        age_years = int(r['age_years'])
        age_months = int(r['age_months'])
        valid = (age_months >= age_years * 12 and 
                 age_months % age_years < 12)
        if not valid:
            raise RecordError('EX8', 'invalid age variables')
    validator.add_record_check(check_age_variables)
    
    return validator
Beispiel #6
0
    raise ValueError("Invalid GFSM revenue value: " + gfsm)

def gfsmExpenseValidator(gfsm):
    """
    Validator function.
    Checks if a string is a valid GFSM 2001 expenditure value.
    """
    p = r"^2(1(1(1|2)?|2(1|2)?)?|2|3|4(1|2|3)?|5(1|2)?|6(1(1|2)?|2(1|2)?|3(1|2)?)?|7(1(1|2)?|2(1|2)?|3(1|2)?)?|8(1(1|2|3|4)?|2(1|2)?)?)?$"
    if re.search(p,gfsm):
        return gfsm
    raise ValueError("Invalid GFSM expense value: " + gfsm)

# Validator for "type" fields.
typeValidator = cv.enumeration(
    "personnel",
    "non-personnel recurrent",
    "capital",
    "other"
    )

field_validators = {
    # Special fields.
    "cofog": cofogValidator,
    "gfsmExpense": gfsmExpenseValidator,
    "gfsmRevenue": gfsmRevenueValidator,
    "type": typeValidator,
    # Ordinary ol' fields.
    "admin": str,
    "adminID": str,
    "adminOrgId": str,
    "amount": float,
    "amountAdjusted": float,