def _handle_func(self, value, rule, path, done=None): """ Helper function that should check if func is specified for this rule and then handle it for all cases in a generic way. """ func = rule.func # func keyword is not defined so nothing to do if not func: return found_method = False for extension in self.loaded_extensions: method = getattr(extension, func, None) if method: found_method = True # No exception will should be caught. If one is raised it should bubble up all the way. ret = method(value, rule, path) # If False or None or some other object that is interpreted as False if not ret: raise CoreError( u"Error when running extension function : {}".format( func)) # Only run the first matched function. Sinc loading order is determined # it should be easy to determine which file is used before others break if not found_method: raise CoreError( u"Did not find method '{}' in any loaded extension file". format(func))
def _validate_assert(self, rule, value, path): if not self.allow_assertions: raise CoreError('To allow usage of keyword "assert" you must use cli flag "--allow-assertions" or set the keyword "allow_assert" in Core class') # Small hack to make strings work as a value. if isinstance(value, str): assert_value_str = '"{0}"'.format(value) else: assert_value_str = '{0}'.format(value) assertion_string = "val = {0}; assert {1}".format(assert_value_str, rule.assertion) try: exec(assertion_string, {}, {}) except AssertionError: self.errors.append(SchemaError.SchemaErrorEntry( msg=u"Value: '{0}' assertion expression failed ({1})".format(value, rule.assertion), path=path, value=value, )) return except Exception as err: error_class = err.__class__.__name__ detail = err.args[0] cl, exc, tb = sys.exc_info() line_number = traceback.extract_tb(tb)[-1][1] raise Exception("Unknown error during assertion\n{0}\n{1}\n{2}\n{3}\n{4}\n{5}".format( error_class, detail, cl, exc, tb, line_number, ))
def _validate_range(self, max_, min_, max_ex, min_ex, value, path, prefix): """ Validate that value is within range values. """ if not isinstance(value, int) and not isinstance(value, float): raise CoreError("Value must be a integer type") log.debug( u"Validate range : %s : %s : %s : %s : %s : %s", max_, min_, max_ex, min_ex, value, path, ) if max_ is not None: if max_ < value: self.errors.append( SchemaError.SchemaErrorEntry( msg= u"Type '{prefix}' has size of '{value}', greater than max limit '{max_}'. Path: '{path}'", path=path, value=nativestr(value) if tt['str'](value) else value, prefix=prefix, max_=max_)) if min_ is not None: if min_ > value: self.errors.append( SchemaError.SchemaErrorEntry( msg= u"Type '{prefix}' has size of '{value}', less than min limit '{min_}'. Path: '{path}'", path=path, value=nativestr(value) if tt['str'](value) else value, prefix=prefix, min_=min_)) if max_ex is not None: if max_ex <= value: self.errors.append( SchemaError.SchemaErrorEntry( msg= u"Type '{prefix}' has size of '{value}', greater than or equals to max limit(exclusive) '{max_ex}'. Path: '{path}'", path=path, value=nativestr(value) if tt['str'](value) else value, prefix=prefix, max_ex=max_ex)) if min_ex is not None: if min_ex >= value: self.errors.append( SchemaError.SchemaErrorEntry( msg= u"Type '{prefix}' has size of '{value}', less than or equals to min limit(exclusive) '{min_ex}'. Path: '{path}'", path=path, value=nativestr(value) if tt['str'](value) else value, prefix=prefix, min_ex=min_ex))
def __init__(self, source_file=None, schema_file=None, source_data=None, schema_data=None): Log.debug("source_file: {}".format(source_file)) Log.debug("schema_file: {}".format(schema_file)) Log.debug("source_data: {}".format(source_data)) Log.debug("schema_data: {}".format(schema_data)) self.source = None self.schema = None self.validation_errors = None self.root_rule = None if source_file is not None: if not os.path.exists(source_file): raise CoreError( "Provided source_file do not exists on disk: {}".format( source_file)) with open(source_file, "r") as stream: if source_file.endswith(".json"): self.source = json.load(stream) elif source_file.endswith(".yaml"): self.source = yaml.load(stream) else: raise CoreError( "Unable to load source_file. Unknown file format of specified file path: {}" .format(source_file)) if schema_file is not None: if not os.path.exists(schema_file): raise CoreError("Provided source_file do not exists on disk") with open(schema_file, "r") as stream: if schema_file.endswith(".json"): self.schema = json.load(stream) elif schema_file.endswith(".yaml"): self.schema = yaml.load(stream) else: raise CoreError( "Unable to load source_file. Unknown file format of specified file path: {}" .format(schema_file)) # Nothing was loaded so try the source_data variable if self.source is None: Log.debug("No source file loaded, trying source data variable") self.source = source_data if self.schema is None: Log.debug("No schema file loaded, trying schema data variable") self.schema = schema_data # Test if anything was loaded if self.source is None: raise CoreError("No source file/data was loaded") if self.schema is None: raise CoreError("No schema file/data was loaded")
def _validate_length(self, rule, value, path, prefix): if not is_string(value): raise CoreError("Value: '{0}' must be a 'str' type for length check to work".format(value)) value_length = len(str(value)) max_, min_, max_ex, min_ex = rule.get('max'), rule.get('min'), rule.get('max-ex'), rule.get('min-ex') log.debug( u"Validate length : %s : %s : %s : %s : %s : %s", max, min, max_ex, min_ex, value, path, ) if max_ is not None and max_ < value_length: self.errors.append(SchemaError.SchemaErrorEntry( msg=u"Value: '{value_str}' has length of '{value}', greater than max limit '{max_}'. Path: '{path}'", value_str=value, path=path, value=len(value), prefix=prefix, max_=max_)) if min_ is not None and min_ > value_length: self.errors.append(SchemaError.SchemaErrorEntry( msg=u"Value: '{value_str}' has length of '{value}', greater than min limit '{min_}'. Path: '{path}'", value_str=value, path=path, value=len(value), prefix=prefix, min_=min_)) if max_ex is not None and max_ex <= value_length: self.errors.append(SchemaError.SchemaErrorEntry( msg=u"Value: '{value_str}' has length of '{value}', greater than max_ex limit '{max_ex}'. Path: '{path}'", value_str=value, path=path, value=len(value), prefix=prefix, max_ex=max_ex)) if min_ex is not None and min_ex >= value_length: self.errors.append(SchemaError.SchemaErrorEntry( msg=u"Value: '{value_str}' has length of '{value}', greater than min_ex limit '{min_ex}'. Path: '{path}'", value_str=value, path=path, value=len(value), prefix=prefix, min_ex=min_ex))
def _validate(self, value, rule, path, errors, done): log.debug("{}".format(rule)) log.debug("Core validate") log.debug(" ? Rule: {}".format(rule._type)) log.debug(" ? Seq: {}".format(rule._sequence)) log.debug(" ? Map: {}".format(rule._mapping)) if rule._required and self.source is None: raise CoreError("required.novalue : {}".format(path)) log.debug(" ? ValidateRule: {}".format(rule)) if rule._include_name is not None: self._validate_include(value, rule, path, errors, done=None) elif rule._sequence is not None: self._validate_sequence(value, rule, path, errors, done=None) elif rule._mapping is not None or rule._allowempty_map: self._validate_mapping(value, rule, path, errors, done=None) else: self._validate_scalar(value, rule, path, errors, done=None)
def _load_extensions(self): """ Load all extension files into the namespace pykwalify.ext """ log.debug(u"loading all extensions : %s", self.extensions) self.loaded_extensions = [] for f in self.extensions: if not os.path.isabs(f): f = os.path.abspath(f) if not os.path.exists(f): raise CoreError(u"Extension file: {0} not found on disk".format(f)) self.loaded_extensions.append(imp.load_source("", f)) log.debug(self.loaded_extensions) log.debug([dir(m) for m in self.loaded_extensions])
def _validate_scalar_type(self, value, t, path): """ """ log.debug(u" # Core scalar: validating scalar type : %s", t) log.debug(u" # Core scalar: scalar type: %s", type(value)) try: if not tt[t](value): self.errors.append(SchemaError.SchemaErrorEntry( msg=u"Value '{value}' is not of type '{scalar_type}'. Path: '{path}'", path=path, value=unicode(value) if tt['str'](value) else value, scalar_type=t)) return False return True except KeyError as e: # Type not found in valid types mapping log.debug(e) raise CoreError(u"Unknown type check: {0!s} : {1!s} : {2!s}".format(path, value, t))
def _validate(self, value, rule, path, done): log.debug(u"Core validate") log.debug(u" ? Rule: %s", rule) log.debug(u" ? Rule_type: %s", rule.type) log.debug(u" ? Seq: %s", rule.sequence) log.debug(u" ? Map: %s", rule.mapping) log.debug(u" ? Done: %s", done) if rule.required and self.source is None: raise CoreError(u"required.novalue : {}".format(path)) log.debug(u" ? ValidateRule: %s", rule) if rule.include_name is not None: self._validate_include(value, rule, path, done=None) elif rule.sequence is not None: self._validate_sequence(value, rule, path, done=None) elif rule.mapping is not None or rule.allowempty_map: self._validate_mapping(value, rule, path, done=None) else: self._validate_scalar(value, rule, path, done=None)
def _validate_scalar(self, value, rule, path, errors=[], done=None): Log.debug("Validate scalar") Log.debug(" # {}".format(value)) Log.debug(" # {}".format(rule)) Log.debug(" # {}".format(rule._type)) Log.debug(" # {}".format(path)) if rule._enum is not None: if value not in rule._enum: errors.append("enum.notexists : {} : {}".format(value, path)) # Set default value if rule._default and value is None: value = rule._default self._validate_scalar_type(value, rule._type, errors, path) if value is None: return if rule._pattern is not None: res = re.match(rule._pattern, str(value)) if res is None: # Not matching errors.append("pattern.unmatch : {} --> {} : {}".format( rule._pattern, value, path)) if rule._range is not None: if not isScalar(value): raise CoreError("value is not a valid scalar") r = rule._range try: v = len(value) value = v except Exception: pass self._validate_range(r.get("max", None), r.get("min", None), r.get("max-ex", None), r.get("min-ex", None), errors, value, path, "scalar")
def __init__(self, source_file=None, schema_files=None, source_data=None, schema_data=None, extensions=None, strict_rule_validation=False, fix_ruby_style_regex=False, allow_assertions=False, file_encoding=None, schema_file_obj=None, data_file_obj=None): """ :param extensions: List of paths to python files that should be imported and available via 'func' keywork. This list of extensions can be set manually or they should be provided by the `--extension` flag from the cli. This list should not contain files specified by the `extensions` list keyword that can be defined at the top level of the schema. """ if schema_files is None: schema_files = [] if extensions is None: extensions = [] log.debug(u"source_file: %s", source_file) log.debug(u"schema_file: %s", schema_files) log.debug(u"source_data: %s", source_data) log.debug(u"schema_data: %s", schema_data) log.debug(u"extension files: %s", extensions) self.source = None self.schema = None self.validation_errors = None self.validation_errors_exceptions = None self.root_rule = None self.extensions = extensions self.errors = [] self.strict_rule_validation = strict_rule_validation self.fix_ruby_style_regex = fix_ruby_style_regex self.allow_assertions = allow_assertions # Patch in all the normal python types into the yaml load instance so we can use all the # internal python types in the yaml loading. yml.constructor.add_constructor('tag:yaml.org,2002:python/bool', Constructor.construct_yaml_bool) yml.constructor.add_constructor('tag:yaml.org,2002:python/complex', Constructor.construct_python_complex) yml.constructor.add_constructor('tag:yaml.org,2002:python/dict', Constructor.construct_yaml_map) yml.constructor.add_constructor('tag:yaml.org,2002:python/float', Constructor.construct_yaml_float) yml.constructor.add_constructor('tag:yaml.org,2002:python/int', Constructor.construct_yaml_int) yml.constructor.add_constructor('tag:yaml.org,2002:python/list', Constructor.construct_yaml_seq) yml.constructor.add_constructor('tag:yaml.org,2002:python/long', Constructor.construct_python_long) yml.constructor.add_constructor('tag:yaml.org,2002:python/none', Constructor.construct_yaml_null) yml.constructor.add_constructor('tag:yaml.org,2002:python/str', Constructor.construct_python_str) yml.constructor.add_constructor('tag:yaml.org,2002:python/tuple', Constructor.construct_python_tuple) yml.constructor.add_constructor('tag:yaml.org,2002:python/unicode', Constructor.construct_python_unicode) if data_file_obj: try: self.source = yml.load(data_file_obj.read()) except Exception as e: raise CoreError("Unable to load data_file_obj input") if schema_file_obj: try: self.schema = yml.load(schema_file_obj.read()) except Exception as e: raise CoreError("Unable to load schema_file_obj") if source_file is not None: if not os.path.exists(source_file): raise CoreError( u"Provided source_file do not exists on disk: {0}".format( source_file)) with open(source_file, "r", encoding=file_encoding) as stream: if source_file.endswith(".json"): self.source = json.load(stream) elif source_file.endswith(".yaml") or source_file.endswith( '.yml'): self.source = yml.load(stream) else: raise CoreError( u"Unable to load source_file. Unknown file format of specified file path: {0}" .format(source_file)) if not isinstance(schema_files, list): raise CoreError(u"schema_files must be of list type") # Merge all schema files into one single file for easy parsing if len(schema_files) > 0: schema_data = {} for f in schema_files: if not os.path.exists(f): raise CoreError( u"Provided source_file do not exists on disk : {0}". format(f)) with open(f, "r", encoding=file_encoding) as stream: if f.endswith(".json"): data = json.load(stream) elif f.endswith(".yaml") or f.endswith(".yml"): data = yml.load(stream) if not data: raise CoreError( u"No data loaded from file : {0}".format(f)) else: raise CoreError( u"Unable to load file : {0} : Unknown file format. Supported file endings is [.json, .yaml, .yml]" ) for key in data.keys(): if key in schema_data.keys(): raise CoreError( u"Parsed key : {0} : two times in schema files..." .format(key)) schema_data = dict(schema_data, **data) self.schema = schema_data # Nothing was loaded so try the source_data variable if self.source is None: log.debug(u"No source file loaded, trying source data variable") self.source = source_data if self.schema is None: log.debug(u"No schema file loaded, trying schema data variable") self.schema = schema_data # Test if anything was loaded if self.source is None: raise CoreError(u"No source file/data was loaded") if self.schema is None: raise CoreError(u"No schema file/data was loaded") # Merge any extensions defined in the schema with the provided list of extensions from the cli for f in self.schema.get('extensions', []): self.extensions.append(f) if not isinstance(self.extensions, list) and all( isinstance(e, str) for e in self.extensions): raise CoreError( u"Specified extensions must be a list of file paths") self._load_extensions() if self.strict_rule_validation: log.info("Using strict rule keywords validation...")
def _validate_scalar(self, value, rule, path, errors=[], done=None): Log.debug("Validate scalar") Log.debug(" # {}".format(value)) Log.debug(" # {}".format(rule)) Log.debug(" # {}".format(rule._type)) Log.debug(" # {}".format(path)) if not rule._sequence is None: raise CoreError("found sequence when validating for scalar") if not rule._mapping is None: raise CoreError("found mapping when validating for scalar") if rule._assert is not None: pass # TODO: implement assertion prolly if rule._enum is not None: if value not in rule._enum: errors.append("enum.notexists : {} : {}".format(value, path)) # Set default value if rule._default and value is None: value = rule._default if value is None: return if rule._pattern is not None: res = re.match(rule._pattern, str(value)) if res is None: # Not matching errors.append("pattern.unmatch : {} --> {} : {}".format( rule._pattern, value, path)) if rule._range is not None: if not isScalar(value): raise CoreError("value is not a valid scalar") r = rule._range try: if r.get("max", None) is not None and r["max"] < int(value): errors.append("range.toolarge : {} < {} : {}".format( r["max"], value, path)) except Exception as e: # In python3 there can be issues when comparing for example int < str. # Try to apply len() but if that fails then report it as an exception try: if r.get("max", None) is not None and r["max"] < len(value): errors.append("range.toolarge : {} < {} : {}".format( r["max"], value, path)) except Exception as e: errors.append("EXCEPTION: range.{} :: {} < {}".format( e, r.get("max", None), value)) try: if r.get("min", None) is not None and r["min"] > int(value): errors.append("range.toosmall : {} > {} : {}".format( r["min"], value, path)) except Exception as e: # In python3 there can be issues when comparing for example int < str. # Try to apply len() but if that fails then report it as an exception try: if r.get("max", None) is not None and r["max"] < len(value): errors.append("range.toosmall : {} > {} : {}".format( r["min"], value, path)) except Exception as e: errors.append("EXCEPTION: range.{} :: {} > {}".format( e, r.get("min", None), value)) try: if r.get("max-ex", None) is not None and r["max-ex"] <= int(value): errors.append("range.tolarge-ex : {} <= {} : {}".format( r["max-ex"], value, path)) except Exception as e: # In python3 there can be issues when comparing for example int < str. # Try to apply len() but if that fails then report it as an exception try: if r.get("max", None) is not None and r["max"] < len(value): errors.append( "range.tolarge-ex : {} <= {} : {}".format( r["max-ex"], value, path)) except Exception as e: errors.append("EXCEPTION: range.{} :: {} <= {}".format( e, r.get("max-ex", None), value)) try: if r.get("min-ex", None) is not None and r["min-ex"] >= int(value): errors.append("range.toosmall-ex : {} >= {} : {}".format( r["min-ex"], value, path)) except Exception as e: # In python3 there can be issues when comparing for example int < str. # Try to apply len() but if that fails then report it as an exception try: if r.get("max", None) is not None and r["max"] < len(value): errors.append( "range.toosmall-ex : {} >= {} : {}".format( r["min-ex"], value, path)) except Exception as e: errors.append("EXCEPTION: range.{} :: {} >= {}".format( e, r.get("min-ex", None), value)) if rule._length is not None: if not isinstance(value, str): raise CoreError("value is not a valid string type") l = rule._length L = len(value) if l.get("max", None) is not None and l["max"] < L: errors.append("length.toolong : {} < {} : {}".format( l["max"], L, path)) if l.get("min", None) is not None and l["min"] > L: errors.append("length.tooshort : {} > {} : {}".format( l["min"], L, path)) if l.get("max-ex", None) is not None and l["max-ex"] <= L: errors.append("length.toolong-ex : {} <= {} : {}".format( l["max-ex"], L, path)) if l.get("min-ex", None) is not None and l["min-ex"] >= L: errors.append("length.tooshort-ex : {} >= {} : {}".format( l["min-ex"], L, path)) self._validate_scalar_type(value, rule._type, errors, path)
def _validate_scalar(self, value, rule, path, done=None): log.debug(u"Validate scalar") log.debug(u" # %s", value) log.debug(u" # %s", rule) log.debug(u" # %s", rule.type) log.debug(u" # %s", path) # Handle 'func' argument on this scalar self._handle_func(value, rule, path, done) if rule.enum is not None: if value not in rule.enum: self.errors.append( SchemaError.SchemaErrorEntry( msg=u"Enum '{value}' does not exist. Path: '{path}'", path=path, value=nativestr(value) if tt['str'](value) else value, )) # Set default value if rule.default and value is None: value = rule.default self._validate_scalar_type(value, rule.type, path) if value is None: return if rule.pattern is not None: res = re.match(rule.pattern, str(value)) if res is None: # Not matching self.errors.append( SchemaError.SchemaErrorEntry( msg= u"Value '{value}' does not match pattern '{pattern}'. Path: '{path}'", path=path, value=nativestr(str(value)), pattern=rule._pattern)) if rule.range is not None: if not is_scalar(value): raise CoreError(u"value is not a valid scalar") r = rule.range try: v = len(value) value = v except Exception: pass self._validate_range( r.get("max", None), r.get("min", None), r.get("max-ex", None), r.get("min-ex", None), value, path, "scalar", ) # Validate timestamp if rule.type == "timestamp": self._validate_scalar_timestamp(value, path)
def __init__(self, source_file=None, schema_files=[], source_data=None, schema_data=None, extensions=[]): """ :param extensions: List of paths to python files that should be imported and available via 'func' keywork. This list of extensions can be set manually or they should be provided by the `--extension` flag from the cli. This list should not contain files specified by the `extensions` list keyword that can be defined at the top level of the schema. """ log.debug(u"source_file: %s", source_file) log.debug(u"schema_file: %s", schema_files) log.debug(u"source_data: %s", source_data) log.debug(u"schema_data: %s", schema_data) log.debug(u"extension files: %s", extensions) self.source = None self.schema = None self.validation_errors = None self.validation_errors_exceptions = None self.root_rule = None self.extensions = extensions self.errors = [] if source_file is not None: if not os.path.exists(source_file): raise CoreError( u"Provided source_file do not exists on disk: {}".format( source_file)) with open(source_file, "r") as stream: if source_file.endswith(".json"): try: self.source = json.load(stream) except Exception: raise CoreError( u"Unable to load any data from source json file") elif source_file.endswith(".yaml") or source_file.endswith( '.yml'): try: self.source = yaml.load(stream) except Exception: raise CoreError( u"Unable to load any data from source yaml file") else: raise CoreError( u"Unable to load source_file. Unknown file format of specified file path: {}" .format(source_file)) if not isinstance(schema_files, list): raise CoreError(u"schema_files must be of list type") # Merge all schema files into one single file for easy parsing if len(schema_files) > 0: schema_data = {} for f in schema_files: if not os.path.exists(f): raise CoreError( u"Provided source_file do not exists on disk : {0}". format(f)) with open(f, "r") as stream: if f.endswith(".json"): try: data = json.load(stream) except Exception: raise CoreError( u"No data loaded from file : {}".format(f)) elif f.endswith(".yaml") or f.endswith(".yml"): data = yaml.load(stream) if not data: raise CoreError( u"No data loaded from file : {}".format(f)) else: raise CoreError( u"Unable to load file : {} : Unknown file format. Supported file endings is [.json, .yaml, .yml]" ) for key in data.keys(): if key in schema_data.keys(): raise CoreError( u"Parsed key : {} : two times in schema files..." .format(key)) schema_data = dict(schema_data, **data) self.schema = schema_data # Nothing was loaded so try the source_data variable if self.source is None: log.debug(u"No source file loaded, trying source data variable") self.source = source_data if self.schema is None: log.debug(u"No schema file loaded, trying schema data variable") self.schema = schema_data # Test if anything was loaded if self.source is None: raise CoreError(u"No source file/data was loaded") if self.schema is None: raise CoreError(u"No schema file/data was loaded") # Merge any extensions defined in the schema with the provided list of extensions from the cli for f in self.schema.get('extensions', []): self.extensions.append(f) if not isinstance(self.extensions, list) and all( [isinstance(e, str) for e in self.extensions]): raise CoreError( u"Specified extensions must be a list of file paths") self._load_extensions()
def _validate_sequence(self, value, rule, path, errors=[], done=None): Log.debug("Core Validate sequence") Log.debug(" * Data: {}".format(value)) Log.debug(" * Rule: {}".format(rule)) Log.debug(" * RuleType: {}".format(rule._type)) Log.debug(" * Path: {}".format(path)) Log.debug(" * Seq: {}".format(rule._sequence)) Log.debug(" * Map: {}".format(rule._mapping)) if not len(rule._sequence) == 1: raise CoreError( "only 1 item allowed in sequence rule : {}".format(path)) if value is None: Log.debug("Core seq: sequence data is None") return r = rule._sequence[0] for i, item in enumerate(value): # Validate recursivley Log.debug("Core seq: validating recursivley: {}".format(r)) self._validate(item, r, "{}/{}".format(path, i), errors, done) Log.debug("Core seq: validation recursivley done...") if rule._range is not None: rr = rule._range self._validate_range(rr.get("max", None), rr.get("min", None), rr.get("max-ex", None), rr.get("min-ex", None), errors, len(value), path, "seq") if r._type == "map": Log.debug("Found map inside sequence") mapping = r._mapping unique_keys = [] for k, rule in mapping.items(): Log.debug("Key: {}".format(k)) Log.debug("Rule: {}".format(rule)) if rule._unique or rule._ident: unique_keys.append(k) if len(unique_keys) > 0: for v in unique_keys: table = {} j = 0 for V in value: val = V[v] if val is None: continue if val in table: curr_path = "{}/{}/{}".format(path, j, k) prev_path = "{}/{}/{}".format(path, table[val], k) errors.append( "value.notunique :: value: {} : {}".format( k, path)) elif r._unique: Log.debug("Found unique value in sequence") table = {} for j, val in enumerate(value): if val is None: continue if val in table: curr_path = "{}/{}".format(path, j) prev_path = "{}/{}".format(path, table[val]) errors.append( "value.notunique :: value: {} : {} : {}".format( val, curr_path, prev_path)) else: table[val] = j
def __init__(self, source_file=None, schema_files=[], source_data=None, schema_data=None): Log.debug("source_file: {}".format(source_file)) Log.debug("schema_file: {}".format(schema_files)) Log.debug("source_data: {}".format(source_data)) Log.debug("schema_data: {}".format(schema_data)) self.source = None self.schema = None self.validation_errors = None self.root_rule = None if source_file is not None: if not os.path.exists(source_file): raise CoreError( "Provided source_file do not exists on disk: {}".format( source_file)) with open(source_file, "r") as stream: if source_file.endswith(".json"): try: self.source = json.load(stream) except Exception as e: raise CoreError( "Unable to load any data from source json file") elif source_file.endswith(".yaml") or source_file.endswith( '.yml'): try: self.source = yaml.load(stream) except Exception as e: raise CoreError( "Unable to load any data from source yaml file") else: raise CoreError( "Unable to load source_file. Unknown file format of specified file path: {}" .format(source_file)) if not isinstance(schema_files, list): raise CoreError("schema_files must be of list type") # Merge all schema files into one signel file for easy parsing if len(schema_files) > 0: schema_data = {} for f in schema_files: if not os.path.exists(f): raise CoreError( "Provided source_file do not exists on disk : {0}". format(f)) with open(f, "r") as stream: if f.endswith(".json"): try: data = json.load(stream) except Exception as e: raise CoreError( "No data loaded from file : {}".format(f)) elif f.endswith(".yaml") or f.endswith(".yml"): data = yaml.load(stream) if not data: raise CoreError( "No data loaded from file : {}".format(f)) else: raise CoreError( "Unable to load file : {} : Unknown file format. Supported file endings is [.json, .yaml, .yml]" ) for key in data.keys(): if key in schema_data.keys(): raise CoreError( "Parsed key : {} : two times in schema files..." .format(key)) schema_data = dict(schema_data, **data) self.schema = schema_data # Nothing was loaded so try the source_data variable if self.source is None: Log.debug("No source file loaded, trying source data variable") self.source = source_data if self.schema is None: Log.debug("No schema file loaded, trying schema data variable") self.schema = schema_data # Test if anything was loaded if self.source is None: raise CoreError("No source file/data was loaded") if self.schema is None: raise CoreError("No schema file/data was loaded")
def _validate_sequence(self, value, rule, path, done=None): """ """ log.debug(u"Core Validate sequence") log.debug(u" Sequence : Data: %s", value) log.debug(u" Sequence : Rule: %s", rule) log.debug(u" Sequence : RuleType: %s", rule.type) log.debug(u" Sequence : Path: %s", path) log.debug(u" Sequence : Seq: %s", rule.sequence) log.debug(u" Sequence : Map: %s", rule.mapping) if len(rule.sequence) <= 0: raise CoreError( u"Sequence must contains atleast one item : {0}".format(path)) if value is None: log.debug(u" * Core seq: sequence data is None") return if not isinstance(value, list): if isinstance(value, str): value = value.encode('unicode_escape') self.errors.append( SchemaError.SchemaErrorEntry( u"Value '{value}' is not a list. Value path: '{path}'", path, value, )) return # Handle 'func' argument on this sequence self._handle_func(value, rule, path, done) ok_values = [] error_tracker = [] unique_errors = {} map_unique_errors = {} for i, item in enumerate(value): processed = [] for r in rule.sequence: tmp_errors = [] try: # Create a sub core object to enable error tracking that do not # collide with this Core objects errors tmp_core = Core(source_data={}, schema_data={}) tmp_core.fix_ruby_style_regex = self.fix_ruby_style_regex tmp_core.allow_assertions = self.allow_assertions tmp_core.strict_rule_validation = self.strict_rule_validation tmp_core.loaded_extensions = self.loaded_extensions tmp_core._validate(item, r, "{0}/{1}".format(path, i), done) tmp_errors = tmp_core.errors except NotMappingError: # For example: If one type was specified as 'map' but data # was 'str' a exception will be thrown but we should ignore it pass except NotSequenceError: # For example: If one type was specified as 'seq' but data # was 'str' a exception will be thrown but we shold ignore it pass processed.append(tmp_errors) if r.type == "map": log.debug(u" * Found map inside sequence") unique_keys = [] if r.mapping is None: log.debug( u" + No rule to apply, prolly because of allowempty: True" ) return for k, _rule in r.mapping.items(): log.debug(u" * Key: %s", k) log.debug(u" * Rule: %s", _rule) if _rule.unique or _rule.ident: unique_keys.append(k) if len(unique_keys) > 0: for v in unique_keys: table = {} for j, V in enumerate(value): # If key do not exists it should be ignored by unique because that is not a broken constraint val = V.get(v, None) if val is None: continue if val in table: curr_path = "{0}/{1}/{2}".format( path, j, v) prev_path = "{0}/{1}/{2}".format( path, table[val], v) s = SchemaError.SchemaErrorEntry( msg= u"Value '{duplicate}' is not unique. Previous path: '{prev_path}'. Path: '{path}'", path=curr_path, value=value, duplicate=val, prev_path=prev_path, ) map_unique_errors[s.__repr__()] = s else: table[val] = j elif r.unique: log.debug(u" * Found unique value in sequence") table = {} for j, val in enumerate(value): if val is None: continue if val in table: curr_path = "{0}/{1}".format(path, j) prev_path = "{0}/{1}".format(path, table[val]) s = SchemaError.SchemaErrorEntry( msg= u"Value '{duplicate}' is not unique. Previous path: '{prev_path}'. Path: '{path}'", path=curr_path, value=value, duplicate=val, prev_path=prev_path, ) unique_errors[s.__repr__()] = s else: table[val] = j error_tracker.append(processed) no_errors = [] for _errors in processed: no_errors.append(len(_errors) == 0) if rule.matching == "any": log.debug(u" * any rule %s", True in no_errors) ok_values.append(True in no_errors) elif rule.matching == "all": log.debug(u" * all rule".format(all(no_errors))) ok_values.append(all(no_errors)) elif rule.matching == "*": log.debug(u" * star rule", "...") ok_values.append(True) for _error in unique_errors: self.errors.append(_error) for _error in map_unique_errors: self.errors.append(_error) log.debug(u" * ok : %s", ok_values) # All values must pass the validation, otherwise add the parsed errors # to the global error list and throw up some error. if not all(ok_values): # Ignore checking for '*' type because it should allways go through if rule.matching == "any": log.debug( u" * Value: %s did not validate against one or more sequence schemas", value) elif rule.matching == "all": log.debug( u" * Value: %s did not validate against all possible sequence schemas", value) for i, is_ok in enumerate(ok_values): if not is_ok: for error in error_tracker[i]: for e in error: self.errors.append(e) log.debug(u" * Core seq: validation recursivley done...") if rule.range is not None: rr = rule.range self._validate_range( rr.get("max"), rr.get("min"), rr.get("max-ex"), rr.get("min-ex"), len(value), path, "seq", )
def __init__( self, source_file=None, schema_files=None, source_data=None, schema_data=None, extensions=None, strict_rule_validation=False, fix_ruby_style_regex=False, allow_assertions=False, ): """ :param extensions: List of paths to python files that should be imported and available via 'func' keywork. This list of extensions can be set manually or they should be provided by the `--extension` flag from the cli. This list should not contain files specified by the `extensions` list keyword that can be defined at the top level of the schema. """ import yaml from pykwalify.errors import CoreError import logging log = logging.getLogger(__name__) logging.disable(logging.CRITICAL) if schema_files is None: schema_files = [] if extensions is None: extensions = [] log.debug(u"source_file: %s", source_file) log.debug(u"schema_file: %s", schema_files) log.debug(u"source_data: %s", source_data) log.debug(u"schema_data: %s", schema_data) log.debug(u"extension files: %s", extensions) self.source = None self.schema = None self.validation_errors = None self.validation_errors_exceptions = None self.root_rule = None self.extensions = extensions self.errors = [] self.strict_rule_validation = strict_rule_validation self.fix_ruby_style_regex = fix_ruby_style_regex self.allow_assertions = allow_assertions if source_file is not None: if not os.path.exists(source_file): raise CoreError( u"Provided source_file do not exists on disk: {0}".format( source_file)) with open(source_file, "r") as stream: self.source = yaml.safe_load(stream) if not isinstance(schema_files, list): raise CoreError(u"schema_files must be of list type") # Merge all schema files into one single file for easy parsing if len(schema_files) > 0: schema_data = {} for f in schema_files: if not os.path.exists(f): raise CoreError( u"Provided source_file do not exists on disk : {0}". format(f)) with open(f, "r") as stream: data = yaml.safe_load(stream) if not data: raise CoreError( u"No data loaded from file : {0}".format(f)) for key in data.keys(): if key in schema_data.keys(): raise CoreError( u"Parsed key : {0} : two times in schema files..." .format(key)) schema_data = dict(schema_data, **data) self.schema = schema_data # Nothing was loaded so try the source_data variable if self.source is None: log.debug(u"No source file loaded, trying source data variable") self.source = source_data if self.schema is None: log.debug(u"No schema file loaded, trying schema data variable") self.schema = schema_data # Test if anything was loaded if self.source is None: raise CoreError(u"No source file/data was loaded") if self.schema is None: raise CoreError(u"No schema file/data was loaded") # Merge any extensions defined in the schema with the provided list of extensions from the cli for f in self.schema.get('extensions', []): self.extensions.append(f) if not isinstance(self.extensions, list) and all( isinstance(e, str) for e in self.extensions): raise CoreError( u"Specified extensions must be a list of file paths") self._load_extensions() if self.strict_rule_validation: log.info("Using strict rule keywords validation...")
def _validate_scalar(self, value, rule, path, errors, done=None): log.debug("Validate scalar") log.debug(" # {}".format(value)) log.debug(" # {}".format(rule)) log.debug(" # {}".format(rule._type)) log.debug(" # {}".format(path)) # Handle 'func' argument on this scalar self._handle_func(value, rule, path, errors, done) if rule._enum is not None: if value not in rule._enum: errors.append( SchemaError.SchemaErrorEntry( msg="Enum '{value}' does not exist. Path: '{path}'", path=path, value=value)) # Set default value if rule._default and value is None: value = rule._default self._validate_scalar_type(value, rule._type, errors, path) if value is None: return if rule._pattern is not None: res = re.match(rule._pattern, str(value)) if res is None: # Not matching errors.append( SchemaError.SchemaErrorEntry( msg= "Value '{value}' does not match pattern '{pattern}'. Path: '{path}'", path=path, value=value, pattern=rule._pattern)) if rule._range is not None: if not is_scalar(value): raise CoreError("value is not a valid scalar") r = rule._range try: v = len(value) value = v except Exception: pass self._validate_range( r.get("max", None), r.get("min", None), r.get("max-ex", None), r.get("min-ex", None), errors, value, path, "scalar", ) # Validate timestamp if rule._type == "timestamp": v = value.strip() # parse("") will give a valid date but it should not be # considered a valid timestamp if v == "": errors.append("timestamp.empty : {} : {}".format(value, path)) else: try: parse(value) # If it can be parsed then it is valid except Exception: errors.append("timestamp.invalid : {} : {}".format( value, path))
def _validate_sequence(self, value, rule, path, errors, done=None): log.debug("Core Validate sequence") log.debug(" * Data: {}".format(value)) log.debug(" * Rule: {}".format(rule)) log.debug(" * RuleType: {}".format(rule._type)) log.debug(" * Path: {}".format(path)) log.debug(" * Seq: {}".format(rule._sequence)) log.debug(" * Map: {}".format(rule._mapping)) if len(rule._sequence) <= 0: raise CoreError( "Sequence must contains atleast one item : {}".format(path)) if value is None: log.debug("Core seq: sequence data is None") return if not isinstance(value, list): raise NotSequenceError( "Value: {} is not of a sequence type".format(value)) # Handle 'func' argument on this sequence self._handle_func(value, rule, path, errors, done) ok_values = [] error_tracker = [] unique_errors = {} map_unique_errors = {} for i, item in enumerate(value): processed = [] for r in rule._sequence: tmp_errors = [] try: self._validate(item, r, "{}/{}".format(path, i), tmp_errors, done) except NotMappingError: # For example: If one type was specified as 'map' but data # was 'str' a exception will be thrown but we should ignore it pass except NotSequenceError: # For example: If one type was specified as 'seq' but data # was 'str' a exception will be thrown but we shold ignore it pass processed.append(tmp_errors) if r._type == "map": log.debug("Found map inside sequence") unique_keys = [] for k, _rule in r._mapping.items(): log.debug("Key: {}".format(k)) log.debug("Rule: {}".format(_rule)) if _rule._unique or _rule._ident: unique_keys.append(k) if len(unique_keys) > 0: for v in unique_keys: table = {} for j, V in enumerate(value): val = V[v] if val is None: continue if val in table: curr_path = "{}/{}/{}".format(path, j, v) prev_path = "{}/{}/{}".format( path, table[val], v) s = SchemaError.SchemaErrorEntry( msg= "Value '{duplicate}' is not unique. Previous path: '{prev_path}'. Path: '{path}'", path=curr_path, value=value, duplicate=val, prev_path=prev_path, ) map_unique_errors[s.__repr__()] = s else: table[val] = j elif r._unique: log.debug("Found unique value in sequence") table = {} for j, val in enumerate(value): if val is None: continue if val in table: curr_path = "{}/{}".format(path, j) prev_path = "{}/{}".format(path, table[val]) s = SchemaError.SchemaErrorEntry( msg= "Value '{duplicate}' is not unique. Previous path: '{prev_path}'. Path: '{path}'", path=curr_path, value=value, duplicate=val, prev_path=prev_path, ) unique_errors[s.__repr__()] = s else: table[val] = j error_tracker.append(processed) no_errors = [] for _errors in processed: no_errors.append(len(_errors) == 0) if rule._matching == "any": log.debug("any rule {}".format(True in no_errors)) ok_values.append(True in no_errors) elif rule._matching == "all": log.debug("all rule".format(all(no_errors))) ok_values.append(all(no_errors)) elif rule._matching == "*": log.debug("star rule", "...") ok_values.append(True) for _error in unique_errors: errors.append(_error) for _error in map_unique_errors: errors.append(_error) log.debug("ok : {}".format(ok_values)) # All values must pass the validation, otherwise add the parsed errors # to the global error list and throw up some error. if not all(ok_values): # Ignore checking for '*' type because it should allways go through if rule._matching == "any": log.debug( "Value: {0} did not validate against one or more sequence schemas" .format(value)) elif rule._matching == "all": log.debug( "Value: {0} did not validate against all possible sequence schemas" .format(value)) for i in range(len(ok_values)): for error in error_tracker[i]: for e in error: errors.append(e) log.debug("Core seq: validation recursivley done...") if rule._range is not None: rr = rule._range self._validate_range( rr.get("max", None), rr.get("min", None), rr.get("max-ex", None), rr.get("min-ex", None), errors, len(value), path, "seq", )
def _validate_scalar(self, value, rule, path, done=None): """ """ log.debug(u"Validate scalar") log.debug(u" Scalar : Value : %s", value) log.debug(u" Scalar : Rule : %s", rule) log.debug(u" Scalar : RuleType : %s", rule.type) log.debug(u" Scalar : Path %s", path) # Handle 'func' argument on this scalar self._handle_func(value, rule, path, done) if rule.assertion is not None: self._validate_assert(rule, value, path) if value is None: return True if rule.enum is not None and value not in rule.enum: self.errors.append( SchemaError.SchemaErrorEntry( msg= u"Enum '{value}' does not exist. Path: '{path}' Enum: {enum_values}", path=path, value=nativestr(value) if tt['str'](value) else value, enum_values=rule.enum, )) # Set default value if rule.default and value is None: value = rule.default if not self._validate_scalar_type(value, rule.type, path): return if value is None: return if rule.pattern is not None: # # Try to trim away the surrounding slashes around ruby style /<regex>/ if they are defined. # This is a quirk from ruby that they define regex patterns with surrounding slashes. # Docs on how ruby regex works can be found here: https://ruby-doc.org/core-2.4.0/Regexp.html # The original ruby implementation uses this code to validate patterns # unless value.to_s =~ rule.regexp # Becuase python do not work with surrounding slashes we have to trim them away in order to make the regex work # if rule.pattern.startswith('/') and rule.pattern.endswith( '/') and self.fix_ruby_style_regex: rule.pattern = rule.pattern[1:-1] log.debug( "Trimming slashes around ruby style regex. New pattern value: '{0}'" .format(rule.pattern)) try: log.debug("Matching pattern '{0}' to regex '{1}".format( rule.pattern, value)) res = re.match(rule.pattern, value, re.UNICODE) except TypeError: res = None if res is None: # Not matching self.errors.append( SchemaError.SchemaErrorEntry( msg= u"Value '{value}' does not match pattern '{pattern}'. Path: '{path}'", path=path, value=nativestr(str(value)), pattern=rule._pattern)) else: log.debug("Pattern matched...") if rule.range is not None: if not is_scalar(value): raise CoreError(u"value is not a valid scalar") r = rule.range try: v = len(value) value = v except Exception: pass self._validate_range( r.get("max"), r.get("min"), r.get("max-ex"), r.get("min-ex"), value, path, "scalar", ) if rule.length is not None: self._validate_length( rule.length, value, path, 'scalar', ) # Validate timestamp if rule.type == "timestamp": self._validate_scalar_timestamp(value, path) if rule.type == "date": if not is_scalar(value): raise CoreError(u'value is not a valid scalar') date_format = rule.format self._validate_scalar_date(value, date_format, path)
def _validate_mapping(self, value, rule, path, errors=[], done=None): Log.debug("Validate mapping") Log.debug(" + Data: {}".format(value)) Log.debug(" + Rule: {}".format(rule)) Log.debug(" + RuleType: {}".format(rule._type)) Log.debug(" + Path: {}".format(path)) Log.debug(" + Seq: {}".format(rule._sequence)) Log.debug(" + Map: {}".format(rule._mapping)) if rule._mapping is None: Log.debug( " + No rule to apply, prolly because of allowempty: True") return if not isinstance(rule._mapping, dict): raise CoreError("mapping is not a valid dict object") if value is None: Log.debug(" + Value is None, returning...") return m = rule._mapping Log.debug(" + RuleMapping: {}".format(m)) for k, rr in m.items(): if rr._required and k not in value: errors.append("required.nokey : {} : {}".format(k, path)) if k not in value and rr._default is not None: value[k] = rr._default for k, v in value.items(): r = m.get(k, None) Log.debug(" + m: {}".format(m)) Log.debug(" + rr: {} {}".format(k, v)) Log.debug(" + r: {}".format(r)) regex_mappings = [(regex_rule, re.match(regex_rule._map_regex_rule, str(k))) for regex_rule in rule._regex_mappings] Log.debug(" + Mapping Regex matches: {}".format(regex_mappings)) if rule._pattern: # This is the global regex pattern specefied at the same level as mapping: and type: map keys res = re.match(rule._pattern, str(k)) Log.debug("Matching regexPattern: {} with value: {}".format( rule._pattern, k)) if res is None: # Not matching errors.append("pattern.unmatch : {} --> {} : {}".format( rule._pattern, k, path)) elif any(regex_mappings): # Found atleast one that matches a mapping regex for mm in regex_mappings: if mm[1]: Log.debug(" + Matching regex patter: {}".format(mm[0])) self._validate(v, mm[0], "{}/{}".format(path, k), errors, done) elif r is None: if not rule._allowempty_map: errors.append("key.undefined : {} : {}".format(k, path)) else: if not r._schema: # validate recursively Log.debug("Core Map: validate recursively: {}".format(r)) self._validate(v, r, "{}/{}".format(path, k), errors, done) else: print(" * Something is ignored Oo : {}".format(r))