def field_required(header, schema, column_validators): if schema.get('exactFields', defaults.EXACTFIELDS): return for column_info in column_validators['columns'].values(): failed = column_info['field_schema'].get('required', defaults.FIELDS_REQUIRED) and column_info['column'] not in header if failed: yield exceptions.ValidationError(message="{0} is a required field".format(column_info['column'])) for column_name, column_info in column_validators['unfoundfields'].items(): if column_info['field_schema'].get('required', defaults.FIELDS_REQUIRED): yield exceptions.ValidationError(message="{0} is a required field".format(column_info['column']))
def minfields(header, schema, column_validators): failed = len(header) < schema['minFields'] if failed: yield exceptions.ValidationError( message="Number of column(s) is less than minFields of {0}".format(schema['minFields']) )
def maxfields(header, schema, column_validators): failed = len(header) > schema['maxFields'] if failed: yield exceptions.ValidationError( message="Number of column(s) is greater than maxFields of {0}".format(schema['maxFields']) )
def field_nullable(cell, schema, field_schema): if field_schema['nullable'] is True: return failed = cell['value'] is None if failed: yield exceptions.ValidationError(message="Illegal null value", column=field_schema.get('name'), row=cell['row'])
def dependencies(header, schema, column_validators): for column, dependents in schema["dependencies"].items(): if column not in header: continue for dependent in dependents: if dependent not in header: yield exceptions.ValidationError( message=f"Field {column} is provided while {dependent} is not in header" )
def field_required(header, schema, column_validators): """ Required is defined under field or definitions, but it is validated with header """ if schema.get("exactFields", defaults.EXACTFIELDS): return for column_info in column_validators["columns"].values(): failed = ( column_info["field_schema"].get("required", defaults.FIELDS_REQUIRED) and column_info["column_name"] not in header ) if failed: yield exceptions.ValidationError(message="{0} is a required field".format(column_info["column_name"])) for column_name, column_info in column_validators["unfoundfields"].items(): if column_info["field_schema"].get("required", defaults.FIELDS_REQUIRED): yield exceptions.ValidationError(message="{0} is a required field".format(column_info["column_name"]))
def field_nullable(cell, schema, field_schema): if field_schema["nullable"] is True: return failed = cell["value"] is None if failed: yield exceptions.ValidationError( message="Illegal null value", column=field_schema.get("name"), row_number=cell["row_number"] )
def dependencies(header, schema, column_validators): for column, dependents in schema['dependencies'].items(): if column not in header: continue for d in dependents: if d in header: continue yield exceptions.ValidationError( message="Field {0} is provided while {0} is not in header".format(column, d) )
def number_of_fields(row, row_number, header_length): """ RFC 4180 - Section 2. Definition of the CSV Format Point 2 - too many ending line break Point 4 - extra comma in some lines Point 6 - wrongly quoted causing wrong field number in a line Make sure each line contains the same number of fields """ if len(row) != header_length: yield exceptions.ValidationError(message="Illegal null value", row_number=row_number)
def field_enum(cell, schema, field_schema): enum = field_schema['enum'] failed = cell['value'] not in enum if failed: yield exceptions.ValidationError( message="Value {0} is not in enum of {1}".format(cell['value'], enum), column=field_schema.get('name'), row=cell['row'] )
def field_enum(cell, schema, field_schema): enum = field_schema["enum"] failed = cell["value"] not in set(enum) if failed: yield exceptions.ValidationError( message="Value {0} is not in enum of {1}".format(cell["value"], enum), column=field_schema.get("name"), row_number=cell["row_number"], )
def additionalfields(header, schema, column_validators): if schema['additionalFields'] is True: return extra_fields = set(header) - set(field.get('name') for field in schema.get('fields', defaults.FIELDS)) for extra_field in extra_fields: matched = False for regex in schema.get('patternFields', defaults.PATTERNFIELDS).keys(): if re.match(regex, extra_field): matched = True if matched is False: yield exceptions.ValidationError(message="Field {0} is not defined".format(extra_field))
def field_type(cell, schema, field_schema): # type is default validator and fields.type could be empty, so it has default value # type validator must run before other field validators (excluding $ref), since it transforms the value type in cell type_name = field_schema.get('type', defaults.FIELDS_TYPE) mapper = types.TYPE_MAPPER[type_name](field_schema=field_schema) if mapper.validate(value=cell['value']) is False: yield exceptions.ValidationError( message="Value {0} does not satisfy the type or format".format(cell['value']), column=field_schema.get('name'), row=cell['row'] ) cell['value'] = mapper.value
def field_multipleof(cell, schema, field_schema): if cell['value'] is None: return multipleof = field_schema['multipleOf'] failed = cell['value'] % multipleof != 0 if failed: yield exceptions.ValidationError( message="Value {0} is not multiple of {1}".format(cell['value'], multipleof), column=field_schema.get('name'), row=cell['row'] )
def field_maxlength(cell, schema, field_schema): if cell["value"] is None: return maxlength = field_schema["maxLength"] failed = maxlength < len(cell["value"]) if failed: yield exceptions.ValidationError( message="Value {0} is longer than minLength of {1}".format(cell["value"], maxlength), column=field_schema.get("name"), row_number=cell["row_number"], )
def field_maxlength(cell, schema, field_schema): if cell['value'] is None: return maxlength = field_schema['maxLength'] failed = maxlength < len(cell['value']) if failed: yield exceptions.ValidationError( message="Value {0} is longer than maxLength of {1}".format(cell['value'], maxlength), column=field_schema.get('name'), row=cell['row'] )
def field_multipleof(cell, schema, field_schema): if cell["value"] is None: return multipleof = field_schema["multipleOf"] failed = cell["value"] % multipleof != 0 if failed: yield exceptions.ValidationError( message="Value {0} is not multiple of {1}".format(cell["value"], multipleof), column=field_schema.get("name"), row_number=cell["row_number"], )
def field_minimum(cell, schema, field_schema): if cell["value"] is None: return minimum = field_schema["minimum"] exclusiveminimum = field_schema.get("exclusiveMinimum", defaults.FIELDS_EXCLUSIVEMININUM) if exclusiveminimum: failed = minimum > cell["value"] comapre = "less than or equal to" else: failed = minimum >= cell["value"] comapre = "less than" if failed: yield exceptions.ValidationError( message="Value {0} is {1} minimum of {2}".format(cell["value"], comapre, minimum), column=field_schema.get("name"), row_number=cell["row_number"], )
def field_minimum(cell, schema, field_schema): if cell['value'] is None: return minimum = field_schema['minimum'] exclusiveminimum = field_schema.get('exclusiveMinimum', defaults.FIELDS_EXCLUSIVEMININUM) if exclusiveminimum: failed = minimum > cell['value'] comapre = "less than or equal to" else: failed = minimum >= cell['value'] comapre = "less than" if failed: yield exceptions.ValidationError( message="Value {0} is {1} minimum of {2}".format(cell['value'], comapre, minimum), column=field_schema.get('name'), row=cell['row'] )
def exactfields(header, schema, column_validators): """ Redo the whole column_validators from fields by order, in order to pass validators to columns with same name correctly """ if not schema['exactFields']: return failed = [field.get('name') for field in schema.get('fields', defaults.FIELDS)] != header if failed: yield exceptions.ValidationError(message="Column name is different to fields.name in schema") column_validators['columns'].clear() for column_index, column in enumerate(header): field_schema = schema.get('fields', defaults.FIELDS)[column_index] column_info = {'field_schema': field_schema, 'column': column} _utilities.find_row_validators(column_info=column_info, field_schema=field_schema) column_validators['columns'][column_index] = column_info