def validate_processor(self, processor):
        """ The OCR-D validator updates all parameters with default values. """
        if not isinstance(processor, Mapping):
            workflow_namespace.abort(
                400, f'Wrong parameter. Unknown processor "{processor}".')

        if processor["name"] not in PROCESSOR_NAMES:
            workflow_namespace.abort(
                400,
                f'Wrong parameter. Unknown processor "{processor["name"]}".')

        processor = merge_dicts(PROCESSORS_ACTION[processor['name']],
                                processor)

        validator = ParameterValidator(PROCESSORS_CONFIG[processor["name"]])
        if "parameters" not in processor:
            processor["parameters"] = {}

        report = validator.validate(processor["parameters"])

        if not report.is_valid:
            workflow_namespace.abort(
                400, f'Wrong parameter. '
                f'Error(s) while validating parameters "{processor["parameters"]}" '
                f'for processor "{processor["name"]}" -> "{str(report.errors)}".'
            )

        return processor
Esempio n. 2
0
 def validate(self):
     if not which(self.executable):
         raise Exception("Executable not found in PATH: %s" %
                         self.executable)
     if not self.input_file_grps:
         raise Exception("Task must have input file group")
     # TODO uncomment and adapt once OCR-D/spec#121 lands
     # # make implicit input/output groups explicit by defaulting to what is
     # # provided in ocrd-tool.json
     # actual_output_grps = [*self.ocrd_tool_json['output_file_grp']]
     # for i, grp in enumerate(self.output_file_grps):
     # actual_output_grps[i] = grp
     # self.output_file_grps = actual_output_grps
     # actual_input_grps = [*self.ocrd_tool_json['input_file_grp']]
     # for i, grp in enumerate(self.input_file_grps):
     # actual_input_grps[i] = grp
     # self.input_file_grps = actual_input_grps
     param_validator = ParameterValidator(self.ocrd_tool_json)
     report = param_validator.validate(self.parameters)
     if not report.is_valid:
         raise Exception(report.errors)
     # TODO remove once OCR-D/spec#121 lands
     if 'output_file_grp' in self.ocrd_tool_json and not self.output_file_grps:
         raise Exception(
             "Processor requires output_file_grp but none was provided.")
     return report
Esempio n. 3
0
 def __init__(self,
              workspace,
              ocrd_tool=None,
              parameter=None,
              input_file_grp="INPUT",
              output_file_grp="OUTPUT",
              page_id=None,
              dump_json=False,
              version=None):
     if parameter is None:
         parameter = {}
     if dump_json:
         print(json.dumps(ocrd_tool, indent=True))
         return
     self.ocrd_tool = ocrd_tool
     self.version = version
     self.workspace = workspace
     # FIXME HACK would be better to use pushd_popd(self.workspace.directory)
     # but there is no way to do that in process here since it's an
     # overridden method. chdir is almost always an anti-pattern.
     if self.workspace:
         os.chdir(self.workspace.directory)
     self.input_file_grp = input_file_grp
     self.output_file_grp = output_file_grp
     self.page_id = None if page_id == [] or page_id is None else page_id
     parameterValidator = ParameterValidator(ocrd_tool)
     report = parameterValidator.validate(parameter)
     if not report.is_valid:
         raise Exception("Invalid parameters %s" % report.errors)
     self.parameter = parameter
Esempio n. 4
0
 def __init__(
         self,
         workspace,
         ocrd_tool=None,
         parameter=None,
         input_file_grp="INPUT",
         output_file_grp="OUTPUT",
         page_id=None,
         dump_json=False,
         version=None
 ):
     if parameter is None:
         parameter = {}
     if dump_json:
         print(json.dumps(ocrd_tool, indent=True))
         return
     self.ocrd_tool = ocrd_tool
     self.version = version
     self.workspace = workspace
     self.input_file_grp = input_file_grp
     self.output_file_grp = output_file_grp
     self.page_id = None if page_id == [] or page_id is None else page_id
     parameterValidator = ParameterValidator(ocrd_tool)
     report = parameterValidator.validate(parameter)
     if not report.is_valid:
         raise Exception("Invalid parameters %s" % report.errors)
     self.parameter = parameter
Esempio n. 5
0
 def test_missing_required(self):
     validator = ParameterValidator({
         "parameters": {
             "i-am-required": {
                 "type": "number",
                 "required": True
             },
         }
     })
     obj = {}
     report = validator.validate(obj)
     self.assertFalse(report.is_valid)
     self.assertIn('is a required property', report.errors[0])
Esempio n. 6
0
def ocrd_tool_tool_parse_params(ctx, parameter, json):
    """
    Parse parameters with fallback to defaults and output as shell-eval'able assignments to params var.
    """
    parameterValidator = ParameterValidator(ctx.json['tools'][ctx.tool_name])
    report = parameterValidator.validate(parameter)
    if not report.is_valid:
        print(report.to_xml())
        sys.exit(1)
    if json:
        print(dumps(parameter))
    else:
        for k in parameter:
            print('params["%s"]="%s"' % (k, parameter[k]))
Esempio n. 7
0
 def validate(self):
     if not which(self.executable):
         raise Exception("Executable not found in PATH: %s" %
                         self.executable)
     if not self.input_file_grps:
         raise Exception("Task must have input file group")
     parameters = {}
     if self.parameter_path:
         parameters = parse_json_string_or_file(self.parameter_path)
     param_validator = ParameterValidator(self.ocrd_tool_json)
     report = param_validator.validate(parameters)
     if not report.is_valid:
         raise Exception(report.errors)
     if 'output_file_grp' in self.ocrd_tool_json and not self.output_file_grps:
         raise Exception(
             "Processor requires output_file_grp but none was provided.")
     return report
Esempio n. 8
0
def validate_parameters(ocrd_tool, executable, param_json):
    '''
    Validate PARAM_JSON against parameter definition of EXECUTABLE in OCRD_TOOL
    '''
    with codecs.open(ocrd_tool, encoding='utf-8') as f:
        ocrd_tool = loads(f.read())
    _inform_of_result(
        ParameterValidator(ocrd_tool['tools'][executable]).validate(
            parse_json_string_or_file(param_json)))
Esempio n. 9
0
def ocrd_tool_tool_parse_params(ctx, parameters, json):
    """
    Parse parameters with fallback to defaults and output as shell-eval'able assignments to params var.
    """
    if parameters is None or parameters == "":
        parameters = {}
    else:
        with open(parameters, 'r') as f:
            parameters = loads(f.read())
    parameterValidator = ParameterValidator(ctx.json['tools'][ctx.tool_name])
    report = parameterValidator.validate(parameters)
    if not report.is_valid:
        print(report.to_xml())
        sys.exit(1)
    if json:
        print(dumps(parameters))
    else:
        for k in parameters:
            print('params["%s"]="%s"' % (k, parameters[k]))
Esempio n. 10
0
 def test_default_assignment(self):
     validator = ParameterValidator({
         "parameters": {
             "num-param": {
                 "type": "number",
                 "default": 1
             },
             "baz": {
                 "type": "string",
                 "required": True,
             },
             'foo': {
                 "required": False
             }
         }
     })
     obj = {'baz': '23'}
     report = validator.validate(obj)
     self.assertTrue(report.is_valid)
     self.assertEqual(obj, {'baz': '23', "num-param": 1})
Esempio n. 11
0
 def validate(self):
     if not which(self.executable):
         raise Exception("Executable not found in PATH: %s" %
                         self.executable)
     if not self.input_file_grps:
         raise Exception("Task must have input file group")
     result = run([self.executable, '--dump-json'],
                  stdout=PIPE,
                  check=True,
                  universal_newlines=True)
     ocrd_tool_json = json.loads(result.stdout)
     parameters = {}
     if self.parameter_path:
         parameters = parse_json_string_or_file(self.parameter_path)
     param_validator = ParameterValidator(ocrd_tool_json)
     report = param_validator.validate(parameters)
     if not report.is_valid:
         raise Exception(report.errors)
     if 'output_file_grp' in ocrd_tool_json and not self.output_file_grps:
         raise Exception(
             "Processor requires output_file_grp but none was provided.")
     return True
Esempio n. 12
0
    def chain_data(self, json_data):
        """ Validate and prepare chain input. """
        data = marshal(data=json_data, fields=chain_model, skip_none=False)

        if data["parameters"] is None:
            data["parameters"] = {}

        # Should some checks be in the model itself?
        if data["processors"] is None:
            chain_namespace.abort(400, "Wrong parameter.",
                                  status="Missing processors for chain.",
                                  statusCode="400")

        for processor in data["processors"]:
            if processor not in PROCESSOR_NAMES:
                chain_namespace.abort(
                    400, "Wrong parameter.",
                    status="Unknown processor \"{}\".".format(processor),
                    statusCode="400")

            # The OCR-D validator updates all parameters with default values.
            if processor not in data["parameters"].keys():
                data["parameters"][processor] = {}
            validator = ParameterValidator(PROCESSORS_CONFIG[processor])
            report = validator.validate(data["parameters"][processor])
            if not report.is_valid:
                chain_namespace.abort(
                    400, "Wrong parameter.",
                    status="Error while validating parameters \"{0}\""
                           "for processor \"{1}\" -> \"{2}\".".format(
                                data["parameters"][processor],
                                processor,
                                str(report.errors)),
                    statusCode="400")

        return data
Esempio n. 13
0
    def __init__(
            self,
            workspace,
            ocrd_tool=None,
            parameter=None,
            # TODO OCR-D/core#274
            # input_file_grp=None,
            # output_file_grp=None,
            input_file_grp="INPUT",
            output_file_grp="OUTPUT",
            page_id=None,
            show_resource=None,
            list_resources=False,
            show_help=False,
            show_version=False,
            dump_json=False,
            version=None):
        """
        Instantiate, but do not process. Unless ``list_resources`` or
        ``show_resource`` or ``show_help`` or ``show_version`` or
        ``dump_json`` is true, setup for processing (parsing and
        validating parameters, entering the workspace directory).

        Args:
             workspace (:py:class:`~ocrd.Workspace`): The workspace to process. \
                 Can be ``None`` even for processing (esp. on multiple workspaces), \
                 but then needs to be set before running.
        Keyword Args:
             ocrd_tool (string): JSON of the ocrd-tool description for that processor. \
                 Can be ``None`` for processing, but needs to be set before running.
             parameter (string): JSON of the runtime choices for ocrd-tool ``parameters``. \
                 Can be ``None`` even for processing, but then needs to be set before running.
             input_file_grp (string): comma-separated list of METS ``fileGrp``s used for input.
             output_file_grp (string): comma-separated list of METS ``fileGrp``s used for output.
             page_id (string): comma-separated list of METS physical ``page`` IDs to process \
                 (or empty for all pages).
             show_resource (string): If not ``None``, then instead of processing, resolve \
                 given resource by name and print its contents to stdout.
             list_resources (boolean): If true, then instead of processing, find all installed \
                 resource files in the search paths and print their path names.
             show_help (boolean): If true, then instead of processing, print a usage description \
                 including the standard CLI and all of this processor's ocrd-tool parameters and \
                 docstrings.
             show_version (boolean): If true, then instead of processing, print information on \
                 this processor's version and OCR-D version. Exit afterwards.
             dump_json (boolean): If true, then instead of processing, print :py:attr:`ocrd_tool` \
                 on stdout.
        """
        if parameter is None:
            parameter = {}
        if dump_json:
            print(json.dumps(ocrd_tool, indent=True))
            return
        if list_resources:
            has_dirs, has_files = get_processor_resource_types(None, ocrd_tool)
            for res in list_all_resources(ocrd_tool['executable']):
                if Path(res).is_dir() and not has_dirs:
                    continue
                if not Path(res).is_dir() and not has_files:
                    continue
                print(res)
            return
        if show_resource:
            has_dirs, has_files = get_processor_resource_types(None, ocrd_tool)
            res_fname = list_resource_candidates(ocrd_tool['executable'],
                                                 show_resource)
            if not res_fname:
                initLogging()
                logger = getLogger('ocrd.%s.__init__' %
                                   ocrd_tool['executable'])
                logger.error("Failed to resolve %s for processor %s" %
                             (show_resource, ocrd_tool['executable']))
            else:
                fpath = Path(res_fname[0])
                if fpath.is_dir():
                    with pushd_popd(fpath):
                        fileobj = io.BytesIO()
                        with tarfile.open(fileobj=fileobj,
                                          mode='w:gz') as tarball:
                            tarball.add('.')
                        fileobj.seek(0)
                        copyfileobj(fileobj, sys.stdout.buffer)
                else:
                    sys.stdout.buffer.write(fpath.read_bytes())
            return
        self.ocrd_tool = ocrd_tool
        if show_help:
            self.show_help()
            return
        self.version = version
        if show_version:
            self.show_version()
            return
        self.workspace = workspace
        # FIXME HACK would be better to use pushd_popd(self.workspace.directory)
        # but there is no way to do that in process here since it's an
        # overridden method. chdir is almost always an anti-pattern.
        if self.workspace:
            self.old_pwd = getcwd()
            os.chdir(self.workspace.directory)
        self.input_file_grp = input_file_grp
        self.output_file_grp = output_file_grp
        self.page_id = None if page_id == [] or page_id is None else page_id
        parameterValidator = ParameterValidator(ocrd_tool)
        report = parameterValidator.validate(parameter)
        if not report.is_valid:
            raise Exception("Invalid parameters %s" % report.errors)
        self.parameter = parameter
Esempio n. 14
0
 def test_extraneous(self):
     validator = ParameterValidator({"parameters": {}})
     obj = {"foo": 42}
     report = validator.validate(obj)
     self.assertFalse(report.is_valid)
     self.assertIn("Additional properties are not allowed ('foo' was unexpected)", report.errors[0])