def _validate_custom_reports(self, custom_reports_data: List[dict]): custom_reports_schema = json.loads(pkgutil.get_data("source_googleanalytics_singer", "custom_reports_schema.json")) if not Draft4Validator(custom_reports_schema).is_valid(custom_reports_data): error_messages = [] for error in Draft4Validator(custom_reports_schema).iter_errors(custom_reports_data): error_messages.append(error.message) raise Exception("An error occurred during custom_reports data validation: " + "; ".join(error_messages))
def validate_parameter(value, param, message_prefix): param = deepcopy(param) primitive_type = 'primitive' if 'type' in param else 'object' converted_value = None if value is not None: if primitive_type == 'primitive': primitive_type = param['type'] if primitive_type in TYPE_MAP: converted_value = validate_primitive_parameter( value, param, primitive_type, message_prefix) elif primitive_type == 'array': try: converted_value = convert_array(param, value, message_prefix) if 'items' in param and param['items']['type'] in ('user', 'role'): handle_user_roles_validation(param['items']) Draft4Validator( param, format_checker=draft4_format_checker).validate( converted_value) except ValidationError as exception: message = '{0} has invalid input. Input {1} does not conform to ' \ 'validators: {2}'.format(message_prefix, value, format_exception_message(exception)) logger.error(message) raise InvalidArgument(message) else: raise InvalidArgument( 'In {0}: Unknown parameter type {1}'.format( message_prefix, primitive_type)) else: try: converted_value = convert_json(param, value, message_prefix) Draft4Validator(param['schema'], format_checker=draft4_format_checker).validate( converted_value) except ValidationError as exception: message = '{0} has invalid input. Input {1} does not conform to ' \ 'validators: {2}'.format(message_prefix, value, format_exception_message(exception)) logger.error(message) raise InvalidArgument(message) elif param.get('required'): message = "In {0}: Missing {1} parameter '{2}'".format( message_prefix, primitive_type, param['name']) logger.error(message) raise InvalidArgument(message) return converted_value
def validity_of_data_subtest(self, data, schema_name): content = resource_string('sme_finance_application_schema', schema_name).decode() schema = json.loads(content) validator = Draft4Validator(schema) patch_store(validator.resolver.store) self.assertTrue(validator.is_valid(data))
def validate(self, **kwargs): """Validate data using schema with ``JSONResolver``.""" # def _concat_deque(queue): # """Helper for joining dequeue object.""" # result = '' # for i in queue: # if isinstance(i, int): # result += '[' + str(i) + ']' # else: # result += '/' + i # return result result = {} try: schema = self['$schema'] if not isinstance(schema, dict): schema = {'$ref': schema} resolver = current_app.extensions[ 'invenio-records'].ref_resolver_cls.from_schema(schema) result['errors'] = [ FieldError(list(error.path), str(error.message)) for error in Draft4Validator(schema, resolver=resolver).iter_errors(self) ] if result['errors']: raise DepositValidationError(None, errors=result['errors']) except RefResolutionError: raise DepositValidationError('Schema with given url not found.') except KeyError: raise DepositValidationError('Schema field is required.')
def validate(self, **kwargs): """Validate data using schema with ``JSONResolver``.""" if '$schema' in self and self['$schema']: try: schema = self['$schema'] if not isinstance(schema, dict): schema = {'$ref': schema} resolver = current_app.extensions[ 'invenio-records'].ref_resolver_cls.from_schema(schema) validator = Draft4Validator(schema, resolver=resolver) result = {} result['errors'] = [ FieldError(list(error.path), str(error.message)) for error in validator.iter_errors(self) ] if result['errors']: raise DepositValidationError(None, errors=result['errors']) except RefResolutionError: raise DepositValidationError('Schema {} not found.'.format( self['$schema'])) else: raise DepositValidationError('You need to provide a valid schema.')
def validate_timestamp(self): try: Draft4Validator(jsonschema_timestamp()).validate(self.data) return True except Exception, error: self.list_errors.append(error) return False
def __init__(self, json_data, strict=False, live_schema=None): self.live_schema = live_schema if not hasattr(json_data, '__getitem__'): raise TypeError('json_data must be a dict.') if (not self.schema) and (live_schema is None): raise NotImplementedError('schema not implemented!') if live_schema is not None: if not self.schema: self.schema = live_schema else: self.schema['properties'].update(live_schema['properties']) if "required" in self.schema and "required" in live_schema: self.schema['required'] = list( set(self.schema['required']) | set(live_schema["required"]) ) Draft4Validator.check_schema(self.schema) self.data = {} if not strict: self._filter_data(json_data, self.schema['properties'], self.data) else: self.data = json_data self.validator = Draft4Validator(self.schema) self.errors = None
def make_validation(self): """ Method to run the mapping for the given number of items :return: a dictionary containing the list of errors for all processed items """ valid = {} invalid = {} user_accessible_ids = self.get_user_content_id() # print(user_accessible_ids) if isinstance(user_accessible_ids, Exception): return Exception("Error with client ID " + self.clientID) else: schema = json.loads(requests.get(self.schema_url).text) resolver = RefResolver(self.schema_url, schema, {}) validator = Draft4Validator(schema, resolver=resolver) content = self.get_all_experiments(self.item_number, user_accessible_ids) for raw_experiment in content: experiment = self.preprocess_content(content[raw_experiment]) try: validation = validator.validate(experiment) if validation is None: valid[raw_experiment] = experiment else: invalid[raw_experiment] = validation except Exception as e: invalid[raw_experiment] = "Unexpected error: " + str(e) return valid, invalid
def persist_messages(messages): state = None validators = {} schemas = {} for message in messages: try: o = singer.parse_message(message).asdict() except json.decoder.JSONDecodeError: logger.error("Unable to parse:\n{}".format(message)) raise message_type = o['type'] if message_type == 'RECORD': if o['stream'] not in schemas: raise Exception( "A record for stream {} was encountered before a corresponding schema" .format(o['stream'])) validators[o['stream']].validate(o['record']) flattened_record = flatten(o['record']) write_to_storagegrid(flattened_record) elif message_type == 'SCHEMA': stream = o['stream'] schemas[stream] = o['schema'] validators[stream] = Draft4Validator(o['schema']) return state
def test_anyOf(self): instance = 5 schema = {"anyOf": [{"minimum": 20}, {"type": "string"}]} validator = Draft4Validator(schema) errors = list(validator.iter_errors(instance)) self.assertEqual(len(errors), 1) e = errors[0] self.assertEqual(e.validator, "anyOf") self.assertEqual(list(e.schema_path), ["anyOf"]) self.assertEqual(e.validator_value, schema["anyOf"]) self.assertEqual(e.instance, instance) self.assertEqual(e.schema, schema) self.assertEqual(list(e.path), []) self.assertEqual(len(e.context), 2) e1, e2 = sorted_errors(e.context) self.assertEqual(e1.validator, "minimum") self.assertEqual(list(e1.schema_path), [0, "minimum"]) self.assertEqual(e1.validator_value, schema["anyOf"][0]["minimum"]) self.assertEqual(e1.instance, instance) self.assertEqual(e1.schema, schema["anyOf"][0]) self.assertEqual(list(e1.path), []) self.assertEqual(len(e1.context), 0) self.assertEqual(e2.validator, "type") self.assertEqual(list(e2.schema_path), [1, "type"]) self.assertEqual(e2.validator_value, schema["anyOf"][1]["type"]) self.assertEqual(e2.instance, instance) self.assertEqual(e2.schema, schema["anyOf"][1]) self.assertEqual(list(e2.path), []) self.assertEqual(len(e2.context), 0)
def process_messages(messages): now = datetime.now().strftime('%Y%m%dT%H%M%S') state = None schemas = {} key_properties = {} headers = {} validators = {} for message in messages: try: msg = singer.parse_message(message).asdict() except json.decoder.JSONDecodeError: logger.error("ERROR: Failed to parse message\n{}".format(message)) message_type = msg['type'] if message_type == 'RECORD': if msg['stream'] not in schemas: raise Exception( "A record for stream {} was encountered before schema". format(msg['stream'])) validators[msg['stream']].validate(msg['record']) elif message_type == 'STATE': logger.debug('Setting state to {}'.format(msg['value'])) elif message_type == 'SCHEMA': stream = msg['stream'] schemas[stream] = msg['schema'] validators[stream] = Draft4Validator(msg['schema']) key_properties[stream] = msg['key_properties'] else: logger.warning("Unknown message type {} in message {}".format( msg['type'], msg)) return state
def _validate_dataset(self, validator_schema, schema_version, dataset): if validator_schema == 'non-federal': if schema_version == '1.1': file_path = 'pod_schema/non-federal-v1.1/dataset.json' else: file_path = 'pod_schema/non-federal/single_entry.json' else: if schema_version == '1.1': file_path = 'pod_schema/federal-v1.1/dataset.json' else: file_path = 'pod_schema/single_entry.json' with open(os.path.join(os.path.dirname(__file__), file_path)) as json_file: schema = json.load(json_file) msg = ";" errors = Draft4Validator( schema, format_checker=FormatChecker()).iter_errors(dataset) count = 0 for error in errors: count += 1 msg = msg + " ### ERROR #" + str( count) + ": " + self._validate_readable_msg(error) + "; " msg = msg.strip("; ") if msg: id = "Identifier: " + (dataset.get("identifier") if dataset.get("identifier") else "Unknown") title = "Title: " + (dataset.get("title") if dataset.get("title") else "Unknown") msg = id + "; " + title + "; " + str( count) + " Error(s) Found. " + msg + "." return msg
def validate_primitive_parameter(value, param, parameter_type, message_prefix, hide_input=False): try: converted_value = convert_primitive_type(value, parameter_type) except (ValueError, TypeError): message = '{0} has invalid input. ' \ 'Input {1} could not be converted to type {2}'.format(message_prefix, value, parameter_type) logger.error(message) raise InvalidInput(message) else: param = deepcopy(param) if 'required' in param: param.pop('required') try: Draft4Validator( param, format_checker=draft4_format_checker).validate(converted_value) except ValidationError as exception: if not hide_input: message = '{0} has invalid input. ' \ 'Input {1} with type {2} does not conform to ' \ 'validators: {3}'.format(message_prefix, value, parameter_type, format_exception_message(exception)) else: message = '{0} has invalid input. {1} does not conform to ' \ 'validators: {2}'.format(message_prefix, parameter_type, format_exception_message(exception)) logger.error(message) raise InvalidInput(message) return converted_value
def dataset2dict(model_type, upload): ''' This method converts the supplied csv, or xml file upload(s) to a uniform dict object, using necessary converter utility functions. @upload, uploaded dataset(s). ''' # local variables list_error = [] converted = [] datasets = upload['dataset'] settings = upload['properties'] stream = settings.get('stream', None) list_model_type = current_app.config.get('MODEL_TYPE') try: # programmatic-interface if stream == 'True': dataset_type = settings['dataset_type'] # convert dataset(s) into extended list for dataset in datasets: # scrape url content if dataset_type == 'dataset_url': r = requests.get(dataset) instance = r.json()['dataset'] else: instance = [dataset] # validate against schema, and build converted list try: if model_type == list_model_type[0]: Draft4Validator(schema_svm()).validate(instance) elif model_type == list_model_type[1]: Draft4Validator(schema_svr()).validate(instance) converted.extend(instance) except Exception, error: msg = "Stream contains invalid syntax, with error: %s" % error converted.extend({'error': msg}) # web-interface else:
def persist_messages(messages, destination_path, compression_method = None): state = None schemas = {} key_properties = {} headers = {} validators = {} records = [] # A list of dictionaries that will contain the records that are retrieved from the tap for message in messages: try: message = singer.parse_message(message).asdict() except json.decoder.JSONDecodeError: raise Exception("Unable to parse:\n{}".format(message)) timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') message_type = message['type'] if message_type == 'STATE': LOGGER.debug('Setting state to {}'.format(message['value'])) state = message['value'] elif message_type == 'SCHEMA': stream = message['stream'] schemas[stream] = message['schema'] validators[stream] = Draft4Validator(message['schema']) key_properties[stream] = message['key_properties'] elif message_type == 'RECORD': if message['stream'] not in schemas: raise Exception("A record for stream {} was encountered before a corresponding schema".format(message['stream'])) stream_name = message['stream'] validators[message['stream']].validate(message['record']) flattened_record = flatten(message['record']) # Once the record is flattenned, it is added to the final record list, which will be stored in the parquet file. records.append(flattened_record) state = None else: LOGGER.warning("Unknown message type {} in message {}".format(message['type'], message)) if len(records) == 0: # If there are not any records retrieved, it is not necessary to create a file. LOGGER.info("There were not any records retrieved.") return state # Create a dataframe out of the record list and store it into a parquet file with the timestamp in the name. dataframe = pd.DataFrame(records) filename = stream_name + '-' + timestamp + '.parquet' filepath = os.path.expanduser(os.path.join(destination_path, filename)) if compression_method: # The target is prepared to accept all the compression methods provided by the pandas module, with the mapping below, # but, at the moment, pyarrow only allow gzip compression. extension_mapping = { 'gzip' : '.gz', 'bz2' : '.bz2', 'zip' : '.zip', 'xz' : '.xz' } dataframe.to_parquet(filepath + extension_mapping[compression_method], engine = 'pyarrow', compression = compression_method) else: dataframe.to_parquet(filepath, engine = 'pyarrow') return state
def validate(self, instance): log = Log() with open(self.file_path, 'r', encoding='utf-8') as f: schema = json.load(f) validator = Draft4Validator(schema) result = validator.is_valid(instance) for error in sorted(validator.iter_errors(instance), key=str): log.error(error) return result
def __init__(self): self.validators = {} self.files = set() self.file_items = set() resolver = RefResolver.from_schema(_json_loads('item')) checker = FormatChecker() for item in ('File', 'FileError', 'FileItem'): self.validators[item] = Draft4Validator(_json_loads(item), resolver=resolver, format_checker=checker)
def test_settings_json_valid(settings_schema, settings_json, should_be_valid): with settings_schema.open("r") as shandle: schema = json.load(shandle) schema_instance = Draft4Validator(schema) with settings_json.open("r") as jhandle: j = json.load(jhandle) assert schema_instance.is_valid(j) == should_be_valid
def persist_lines(config, lines): state = None schemas = {} key_properties = {} headers = {} validators = {} now = datetime.now().strftime('%Y%m%dT%H%M%S') # Loop over lines from stdin for line in lines: try: o = json.loads(line) except json.decoder.JSONDecodeError: logger.error("Unable to parse:\n{}".format(line)) raise if 'type' not in o: raise Exception("Line is missing required key 'type': {}".format(line)) t = o['type'] if t == 'RECORD': if 'stream' not in o: raise Exception("Line is missing required key 'stream': {}".format(line)) if o['stream'] not in schemas: raise Exception("A record for stream {} was encountered before a corresponding schema".format(o['stream'])) # Get schema for this record's stream schema = schemas[o['stream']] # Validate record validators[o['stream']].validate(o['record']) # If the record needs to be flattened, uncomment this line # flattened_record = flatten(o['record']) # TODO: Process Record message here.. state = None elif t == 'STATE': logger.debug('Setting state to {}'.format(o['value'])) state = o['value'] elif t == 'SCHEMA': if 'stream' not in o: raise Exception("Line is missing required key 'stream': {}".format(line)) stream = o['stream'] schemas[stream] = o['schema'] validators[stream] = Draft4Validator(o['schema']) if 'key_properties' not in o: raise Exception("key_properties field is required") key_properties[stream] = o['key_properties'] else: raise Exception("Unknown message type {} in message {}" .format(o['type'], o)) return state
def test_format_error_causes_become_validation_error_causes(self): checker = FormatChecker() checker.checks("boom", raises=ValueError)(boom) validator = Draft4Validator({"format": "boom"}, format_checker=checker) with self.assertRaises(ValidationError) as cm: validator.validate("BOOM") self.assertIs(cm.exception.cause, BOOM) self.assertIs(cm.exception.__cause__, BOOM)
def test_format_error_causes_become_validation_error_causes(self): checker = FormatChecker() checker.checks("foo", raises=ValueError)(self.fn) cause = self.fn.side_effect = ValueError() validator = Draft4Validator({"format": "foo"}, format_checker=checker) with self.assertRaises(ValidationError) as cm: validator.validate("bar") self.assertIs(cm.exception.__cause__, cause)
def __init__(self, schema_uris, schemadir=None, resolve_local=True, resolve_remote=False, resolve_cache_expire=5): if resolve_remote: self.http_session = retrying_http_session() store = load_schemas(schema_uris, schemadir) if resolve_local and resolve_remote: LOGGER.debug("Resolving URLs and locating local references") elif resolve_local: LOGGER.debug("Locating local references") elif resolve_remote: LOGGER.debug("Resolving URLs") self.resolve_local = resolve_local self.resolve_remote = resolve_remote self.resolve_cache = {} self.resolve_cache_expire = resolve_cache_expire if resolve_remote and resolve_cache_expire > 0: try: with open(SchemaValidator.cache_path) as f: urls = yaml.load(f) except IOError: LOGGER.debug('No resolve cache available ({0})'.format( SchemaValidator.cache_path)) else: now = datetime.datetime.now() for url, stamp in urls.items(): if (now - stamp).days <= resolve_cache_expire: self.resolve_cache[url] = stamp # Resolve date-time as dates as well as strings try: types = {u'string': (basestring, datetime.date)} except NameError: types = {u'string': (str, datetime.date)} format_checker = jsonschema.draft4_format_checker format_checker.checkers['uri'] = (self.url_ref, ValueError) self.validators = {} for schema_uri in schema_uris: schema = store[schema_uri] resolver = jsonschema.RefResolver(schema_uri, schema, store=store) self.validators[schema_uri] = Draft4Validator( schema, resolver=resolver, types=types, format_checker=format_checker)
def validate_dataset(self, data_instance): try: Draft4Validator(jsonschema_data()).validate(data_instance) return True except Exception, error: self.list_error.append({ 'class': 'Data_Iterator', 'method': 'validate_dataset', 'msg': error }) return False
def load_config(filename): """ Load the configuration file and test if version is supported. """ (_, ext) = os.path.splitext(filename) config = None if ext == '.json': import json with open(filename, 'rb') as fds: config = json.load(fds) elif ext in ['.yml', '.yaml']: import yaml with open(filename, 'rb') as fds: config = yaml.safe_load(fds) else: raise LoadConfigException('Config file extension not recognized', filename) validator = Draft4Validator(CONFIGSCHEMA) validation_error = False for error in validator.iter_errors(config): validation_error = True logging.error('Config file validation Error:\n%s', error) if validation_error: raise LoadConfigException( 'Error(s) occured while validating the ' 'config file', filename) try: version_value = int(config['header']['version']) except ValueError: # Be compatible: version string '0.10' is equivalent to file version 1 # This check is already done in the config schema so here just set the # right version version_value = 1 if version_value < __compatible_file_version__ or \ version_value > __file_version__: raise LoadConfigException( 'This version of kas is compatible with ' 'version {} to {}, file has version {}'.format( __compatible_file_version__, __file_version__, version_value), filename) if config.get('proxy_config'): logging.warning('Obsolete ' 'proxy_config' ' detected. ' 'This has no effect and will be rejected soon.') return config
def get_validator_for_response(self, path, http_method, status_code): if (path, http_method, status_code) not in self.validators: response = self.openapi_spec["paths"][path][http_method][ "responses"][status_code] schema = response["content"][self.DEFAULT_CONTENT_TYPE]["schema"] validator = Draft4Validator(schema, format_checker=self.format_checker) self.validators[(path, http_method, status_code)] = validator return self.validators[(path, http_method, status_code)]
def process_schema(msg, validators, schemas, key_properties): if not msg.stream: raise Exception( "Line is missing required key 'stream': {}".format(msg)) stream = msg.stream schemas[stream] = msg.schema validators[stream] = Draft4Validator(msg.schema) if not msg.key_properties: raise Exception("key_properties field is required") key_properties[stream] = msg.key_properties
def handle_schema(o, schemas, validators, key_properties, line): if 'stream' not in o: raise Exception( "Line is missing required key 'stream': {}".format(line)) stream = o['stream'] schemas[stream] = o['schema'] validators[stream] = Draft4Validator(o['schema']) if 'key_properties' not in o: raise Exception("key_properties field is required") key_properties[stream] = o['key_properties'] return schemas, validators, key_properties
def validate_label(self): '''@validate_label This method validates either the dependent variable (observation) label(s), or the independent variable (feature) label(s). ''' try: Draft4Validator(jsonschema_string()).validate({'value': self.data}) except Exception, error: self.list_error.append(str(error))
def __init__(self, port_id, validator=None, constructor=None, required=False, annotations=None, depth=0): self.id = port_id self.validator = Draft4Validator(validator) self.required = required self.annotations = annotations self.constructor = constructor or str self.depth = depth
def get_validator_for_request_body(self, path, http_method): if (path, http_method) not in self.validators: request_body = self.openapi_spec["paths"][path][http_method].get( "requestBody") validator = None if request_body: schema = request_body["content"][ self.DEFAULT_CONTENT_TYPE]["schema"] validator = Draft4Validator(schema, format_checker=self.format_checker) self.validators[(path, http_method)] = validator return self.validators[(path, http_method)]