def validate_processor(self, processor): """ The OCR-D validator updates all parameters with default values. """ if not isinstance(processor, Mapping): workflow_namespace.abort( 400, f'Wrong parameter. Unknown processor "{processor}".') if processor["name"] not in PROCESSOR_NAMES: workflow_namespace.abort( 400, f'Wrong parameter. Unknown processor "{processor["name"]}".') processor = merge_dicts(PROCESSORS_ACTION[processor['name']], processor) validator = ParameterValidator(PROCESSORS_CONFIG[processor["name"]]) if "parameters" not in processor: processor["parameters"] = {} report = validator.validate(processor["parameters"]) if not report.is_valid: workflow_namespace.abort( 400, f'Wrong parameter. ' f'Error(s) while validating parameters "{processor["parameters"]}" ' f'for processor "{processor["name"]}" -> "{str(report.errors)}".' ) return processor
def validate(self): if not which(self.executable): raise Exception("Executable not found in PATH: %s" % self.executable) if not self.input_file_grps: raise Exception("Task must have input file group") # TODO uncomment and adapt once OCR-D/spec#121 lands # # make implicit input/output groups explicit by defaulting to what is # # provided in ocrd-tool.json # actual_output_grps = [*self.ocrd_tool_json['output_file_grp']] # for i, grp in enumerate(self.output_file_grps): # actual_output_grps[i] = grp # self.output_file_grps = actual_output_grps # actual_input_grps = [*self.ocrd_tool_json['input_file_grp']] # for i, grp in enumerate(self.input_file_grps): # actual_input_grps[i] = grp # self.input_file_grps = actual_input_grps param_validator = ParameterValidator(self.ocrd_tool_json) report = param_validator.validate(self.parameters) if not report.is_valid: raise Exception(report.errors) # TODO remove once OCR-D/spec#121 lands if 'output_file_grp' in self.ocrd_tool_json and not self.output_file_grps: raise Exception( "Processor requires output_file_grp but none was provided.") return report
def __init__(self, workspace, ocrd_tool=None, parameter=None, input_file_grp="INPUT", output_file_grp="OUTPUT", page_id=None, dump_json=False, version=None): if parameter is None: parameter = {} if dump_json: print(json.dumps(ocrd_tool, indent=True)) return self.ocrd_tool = ocrd_tool self.version = version self.workspace = workspace # FIXME HACK would be better to use pushd_popd(self.workspace.directory) # but there is no way to do that in process here since it's an # overridden method. chdir is almost always an anti-pattern. if self.workspace: os.chdir(self.workspace.directory) self.input_file_grp = input_file_grp self.output_file_grp = output_file_grp self.page_id = None if page_id == [] or page_id is None else page_id parameterValidator = ParameterValidator(ocrd_tool) report = parameterValidator.validate(parameter) if not report.is_valid: raise Exception("Invalid parameters %s" % report.errors) self.parameter = parameter
def __init__( self, workspace, ocrd_tool=None, parameter=None, input_file_grp="INPUT", output_file_grp="OUTPUT", page_id=None, dump_json=False, version=None ): if parameter is None: parameter = {} if dump_json: print(json.dumps(ocrd_tool, indent=True)) return self.ocrd_tool = ocrd_tool self.version = version self.workspace = workspace self.input_file_grp = input_file_grp self.output_file_grp = output_file_grp self.page_id = None if page_id == [] or page_id is None else page_id parameterValidator = ParameterValidator(ocrd_tool) report = parameterValidator.validate(parameter) if not report.is_valid: raise Exception("Invalid parameters %s" % report.errors) self.parameter = parameter
def test_missing_required(self): validator = ParameterValidator({ "parameters": { "i-am-required": { "type": "number", "required": True }, } }) obj = {} report = validator.validate(obj) self.assertFalse(report.is_valid) self.assertIn('is a required property', report.errors[0])
def ocrd_tool_tool_parse_params(ctx, parameter, json): """ Parse parameters with fallback to defaults and output as shell-eval'able assignments to params var. """ parameterValidator = ParameterValidator(ctx.json['tools'][ctx.tool_name]) report = parameterValidator.validate(parameter) if not report.is_valid: print(report.to_xml()) sys.exit(1) if json: print(dumps(parameter)) else: for k in parameter: print('params["%s"]="%s"' % (k, parameter[k]))
def validate(self): if not which(self.executable): raise Exception("Executable not found in PATH: %s" % self.executable) if not self.input_file_grps: raise Exception("Task must have input file group") parameters = {} if self.parameter_path: parameters = parse_json_string_or_file(self.parameter_path) param_validator = ParameterValidator(self.ocrd_tool_json) report = param_validator.validate(parameters) if not report.is_valid: raise Exception(report.errors) if 'output_file_grp' in self.ocrd_tool_json and not self.output_file_grps: raise Exception( "Processor requires output_file_grp but none was provided.") return report
def validate_parameters(ocrd_tool, executable, param_json): ''' Validate PARAM_JSON against parameter definition of EXECUTABLE in OCRD_TOOL ''' with codecs.open(ocrd_tool, encoding='utf-8') as f: ocrd_tool = loads(f.read()) _inform_of_result( ParameterValidator(ocrd_tool['tools'][executable]).validate( parse_json_string_or_file(param_json)))
def ocrd_tool_tool_parse_params(ctx, parameters, json): """ Parse parameters with fallback to defaults and output as shell-eval'able assignments to params var. """ if parameters is None or parameters == "": parameters = {} else: with open(parameters, 'r') as f: parameters = loads(f.read()) parameterValidator = ParameterValidator(ctx.json['tools'][ctx.tool_name]) report = parameterValidator.validate(parameters) if not report.is_valid: print(report.to_xml()) sys.exit(1) if json: print(dumps(parameters)) else: for k in parameters: print('params["%s"]="%s"' % (k, parameters[k]))
def test_default_assignment(self): validator = ParameterValidator({ "parameters": { "num-param": { "type": "number", "default": 1 }, "baz": { "type": "string", "required": True, }, 'foo': { "required": False } } }) obj = {'baz': '23'} report = validator.validate(obj) self.assertTrue(report.is_valid) self.assertEqual(obj, {'baz': '23', "num-param": 1})
def validate(self): if not which(self.executable): raise Exception("Executable not found in PATH: %s" % self.executable) if not self.input_file_grps: raise Exception("Task must have input file group") result = run([self.executable, '--dump-json'], stdout=PIPE, check=True, universal_newlines=True) ocrd_tool_json = json.loads(result.stdout) parameters = {} if self.parameter_path: parameters = parse_json_string_or_file(self.parameter_path) param_validator = ParameterValidator(ocrd_tool_json) report = param_validator.validate(parameters) if not report.is_valid: raise Exception(report.errors) if 'output_file_grp' in ocrd_tool_json and not self.output_file_grps: raise Exception( "Processor requires output_file_grp but none was provided.") return True
def chain_data(self, json_data): """ Validate and prepare chain input. """ data = marshal(data=json_data, fields=chain_model, skip_none=False) if data["parameters"] is None: data["parameters"] = {} # Should some checks be in the model itself? if data["processors"] is None: chain_namespace.abort(400, "Wrong parameter.", status="Missing processors for chain.", statusCode="400") for processor in data["processors"]: if processor not in PROCESSOR_NAMES: chain_namespace.abort( 400, "Wrong parameter.", status="Unknown processor \"{}\".".format(processor), statusCode="400") # The OCR-D validator updates all parameters with default values. if processor not in data["parameters"].keys(): data["parameters"][processor] = {} validator = ParameterValidator(PROCESSORS_CONFIG[processor]) report = validator.validate(data["parameters"][processor]) if not report.is_valid: chain_namespace.abort( 400, "Wrong parameter.", status="Error while validating parameters \"{0}\"" "for processor \"{1}\" -> \"{2}\".".format( data["parameters"][processor], processor, str(report.errors)), statusCode="400") return data
def __init__( self, workspace, ocrd_tool=None, parameter=None, # TODO OCR-D/core#274 # input_file_grp=None, # output_file_grp=None, input_file_grp="INPUT", output_file_grp="OUTPUT", page_id=None, show_resource=None, list_resources=False, show_help=False, show_version=False, dump_json=False, version=None): """ Instantiate, but do not process. Unless ``list_resources`` or ``show_resource`` or ``show_help`` or ``show_version`` or ``dump_json`` is true, setup for processing (parsing and validating parameters, entering the workspace directory). Args: workspace (:py:class:`~ocrd.Workspace`): The workspace to process. \ Can be ``None`` even for processing (esp. on multiple workspaces), \ but then needs to be set before running. Keyword Args: ocrd_tool (string): JSON of the ocrd-tool description for that processor. \ Can be ``None`` for processing, but needs to be set before running. parameter (string): JSON of the runtime choices for ocrd-tool ``parameters``. \ Can be ``None`` even for processing, but then needs to be set before running. input_file_grp (string): comma-separated list of METS ``fileGrp``s used for input. output_file_grp (string): comma-separated list of METS ``fileGrp``s used for output. page_id (string): comma-separated list of METS physical ``page`` IDs to process \ (or empty for all pages). show_resource (string): If not ``None``, then instead of processing, resolve \ given resource by name and print its contents to stdout. list_resources (boolean): If true, then instead of processing, find all installed \ resource files in the search paths and print their path names. show_help (boolean): If true, then instead of processing, print a usage description \ including the standard CLI and all of this processor's ocrd-tool parameters and \ docstrings. show_version (boolean): If true, then instead of processing, print information on \ this processor's version and OCR-D version. Exit afterwards. dump_json (boolean): If true, then instead of processing, print :py:attr:`ocrd_tool` \ on stdout. """ if parameter is None: parameter = {} if dump_json: print(json.dumps(ocrd_tool, indent=True)) return if list_resources: has_dirs, has_files = get_processor_resource_types(None, ocrd_tool) for res in list_all_resources(ocrd_tool['executable']): if Path(res).is_dir() and not has_dirs: continue if not Path(res).is_dir() and not has_files: continue print(res) return if show_resource: has_dirs, has_files = get_processor_resource_types(None, ocrd_tool) res_fname = list_resource_candidates(ocrd_tool['executable'], show_resource) if not res_fname: initLogging() logger = getLogger('ocrd.%s.__init__' % ocrd_tool['executable']) logger.error("Failed to resolve %s for processor %s" % (show_resource, ocrd_tool['executable'])) else: fpath = Path(res_fname[0]) if fpath.is_dir(): with pushd_popd(fpath): fileobj = io.BytesIO() with tarfile.open(fileobj=fileobj, mode='w:gz') as tarball: tarball.add('.') fileobj.seek(0) copyfileobj(fileobj, sys.stdout.buffer) else: sys.stdout.buffer.write(fpath.read_bytes()) return self.ocrd_tool = ocrd_tool if show_help: self.show_help() return self.version = version if show_version: self.show_version() return self.workspace = workspace # FIXME HACK would be better to use pushd_popd(self.workspace.directory) # but there is no way to do that in process here since it's an # overridden method. chdir is almost always an anti-pattern. if self.workspace: self.old_pwd = getcwd() os.chdir(self.workspace.directory) self.input_file_grp = input_file_grp self.output_file_grp = output_file_grp self.page_id = None if page_id == [] or page_id is None else page_id parameterValidator = ParameterValidator(ocrd_tool) report = parameterValidator.validate(parameter) if not report.is_valid: raise Exception("Invalid parameters %s" % report.errors) self.parameter = parameter
def test_extraneous(self): validator = ParameterValidator({"parameters": {}}) obj = {"foo": 42} report = validator.validate(obj) self.assertFalse(report.is_valid) self.assertIn("Additional properties are not allowed ('foo' was unexpected)", report.errors[0])