Example #1
0
    def test_incomplete_params(self):
        config_dict = {
            "inputs": [
                {"name": "param1", "type": types.INT},
                {"name": "param2", "type": types.INT},
            ],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)
        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param1": {"value": 1}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        config_dict = {
            "outputs": [
                {"name": "param1", "type": types.INT, "value": 12, "isOptional": True},
                {"name": "param2", "type": types.INT},
            ],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)
        ops_params.validate_params(
            params={"param1": {"value": 1}},
            inputs=config.inputs,
            outputs=config.outputs,
            is_template=False,
        )
Example #2
0
    def test_extra_params(self):
        # inputs
        config_dict = {
            "inputs": [{"name": "param1", "type": types.INT}],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)
        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param1": {"value": 1}, "param2": {"value": 2}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        # outputs
        config_dict = {
            "outputs": [{"name": "param1", "type": types.INT}],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)
        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param1": {"value": 1}, "param2": {"value": 2}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )
Example #3
0
 def test_job_refs_params(self):
     config_dict = {
         "inputs": [
             {
                 "name": "param1",
                 "type": types.INT
             },
             {
                 "name": "param9",
                 "type": types.FLOAT
             },
         ],
         "run": {
             "kind": V1RunKind.JOB,
             "container": {
                 "image": "test"
             }
         },
     }
     params = {
         "param1": {
             "ref": "job.A",
             "value": "outputs.foo"
         },
         "param9": {
             "value": 13.1
         },
     }
     config = V1CompiledOperation.from_dict(config_dict)
     # Validation outside the context of a pipeline
     with self.assertRaises(ValidationError):
         ops_params.validate_params(params=params,
                                    inputs=config.inputs,
                                    outputs=None,
                                    is_template=False)
Example #4
0
    def test_param_validation_with_outputs(self):
        config_dict = {
            "outputs": [
                {"name": "param1", "type": types.STR},
                {"name": "param2", "type": types.INT},
                {"name": "param3", "type": types.FLOAT},
                {"name": "param4", "type": types.BOOL},
                {"name": "param5", "type": types.DICT},
                {"name": "param6", "type": types.LIST},
                {"name": "param7", "type": types.GCS},
                {"name": "param8", "type": types.S3},
                {"name": "param9", "type": types.WASB},
                {"name": "param10", "type": types.PATH},
                {"name": "param11", "type": types.METRIC},
                {"name": "param12", "type": types.METADATA},
                {"name": "param13", "type": types.METADATA},
                {"name": "param14", "type": types.METADATA},
            ],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        op = V1CompiledOperation.from_dict(config_dict)
        params = {
            "param1": {"value": "text"},
            "param2": {"value": 12},
            "param3": {"value": 13.3},
            "param4": {"value": False},
            "param5": {"value": {"foo": "bar"}},
            "param6": {"value": [1, 3, 45, 5]},
            "param7": {"value": "gs://bucket/path/to/blob/"},
            "param8": {"value": "s3://test/this/is/bad/key.txt"},
            "param9": {"value": "wasbs://[email protected]/"},
            "param10": {"value": "/foo/bar"},
            "param11": {"value": 124.4},
            "param12": {"value": {"foo": 124.4}},
            "param13": {"value": {"foo": "bar"}},
            "param14": {"value": {"foo": ["foo", 124.4]}},
        }
        validated_params = ops_params.validate_params(
            params=params, inputs=None, outputs=op.outputs, is_template=False
        )
        assert params == {p.name: p.param.to_dict() for p in validated_params}

        # Passing missing params
        params.pop("param1")
        params.pop("param2")
        validated_params = ops_params.validate_params(
            params=params, inputs=None, outputs=op.outputs, is_template=False
        )

        params["param1"] = {"value": None}
        params["param2"] = {"value": None}
        assert params == {p.name: p.param.to_dict() for p in validated_params}
Example #5
0
 def test_experiment_and_job_refs_params(self):
     config_dict = {
         "inputs": [
             {"name": "param1", "type": types.INT},
             {"name": "param2", "type": types.FLOAT},
             {"name": "param9", "type": types.WASB},
             {"name": "param11", "type": types.METRIC},
         ],
         "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
     }
     op = V1CompiledOperation.from_dict(config_dict)
     params = {
         "param1": {
             "ref": "runs.64332180bfce46eba80a65caf73c5396",
             "value": "outputs.foo",
         },
         "param2": {
             "ref": "runs.0de53b5bf8b04a219d12a39c6b92bcce",
             "value": "outputs.foo",
         },
         "param9": {"value": "wasbs://[email protected]/"},
         "param11": {
             "ref": "runs.fcc462d764104eb698d3cca509f34154",
             "value": "outputs.accuracy",
         },
     }
     validated_params = ops_params.validate_params(
         params=params, inputs=op.inputs, outputs=None, is_template=False
     )
     assert {p.name: p.param.to_dict() for p in validated_params} == params
 def validate_params(self,
                     params=None,
                     context=None,
                     is_template=True,
                     check_runs=False) -> List[ParamSpec]:
     return ops_params.validate_params(
         inputs=self.inputs,
         outputs=self.outputs,
         params=params,
         matrix=self.matrix,
         context=context,
         is_template=is_template,
         check_runs=check_runs,
     )
Example #7
0
 def validate_params(
     self,
     params: Dict = None,
     context: Dict = None,
     is_template: bool = True,
     check_runs: bool = False,
     parse_values: bool = False,
 ) -> List[ParamSpec]:
     return ops_params.validate_params(
         inputs=self.inputs,
         outputs=self.outputs,
         params=params,
         matrix=self.matrix,
         context=context,
         is_template=is_template,
         check_runs=check_runs,
         parse_values=parse_values,
     )
Example #8
0
    def test_required_input_no_param_only_validated_on_run(self):
        # Inputs
        config_dict = {
            "inputs": [
                {"name": "param1", "type": types.STR},
                {"name": "param10", "type": types.PATH},
            ],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)
        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param1": {"value": "text"}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        # Outputs
        config_dict = {
            "outputs": [
                {"name": "param1", "type": types.STR},
                {"name": "param10", "type": types.PATH},
            ],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)

        ops_params.validate_params(
            params={"param1": {"value": "text"}},
            inputs=config.inputs,
            outputs=config.outputs,
            is_template=False,
        )

        # IO
        config_dict = {
            "inputs": [{"name": "param1", "type": types.STR}],
            "outputs": [{"name": "param10", "type": types.PATH}],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)
        ops_params.validate_params(
            params={"param1": {"value": "text"}},
            inputs=config.inputs,
            outputs=config.outputs,
            is_template=False,
        )
Example #9
0
    def test_param_validation_with_mismatched_outputs(self):
        config_dict = {
            "outputs": [{"name": "param1", "type": types.INT}],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)
        # Passing correct param
        ops_params.validate_params(
            params={"param1": {"value": 1}},
            inputs=config.inputs,
            outputs=config.outputs,
            is_template=False,
        )
        # Passing wrong type
        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param1": {"value": "text"}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param1": {"value": 12.1}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param1": {"value": {"foo": "bar"}}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param1": {"value": "gs://bucket/path/to/blob/"}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        config_dict = {
            "outputs": [{"name": "param2", "type": types.STR}],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)
        # Passing correct param
        ops_params.validate_params(
            params={"param2": {"value": "text"}},
            inputs=config.inputs,
            outputs=config.outputs,
            is_template=False,
        )
        # Passing wrong type
        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param2": {"value": 1}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param2": {"value": False}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param2": {"value": {"foo": "bar"}}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param2": {"value": ["gs://bucket/path/to/blob/"]}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        config_dict = {
            "outputs": [{"name": "param7", "type": types.WASB}],
            "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}},
        }
        config = V1CompiledOperation.from_dict(config_dict)
        # Passing correct param
        ops_params.validate_params(
            params={
                "param7": {"value": "wasbs://[email protected]/"}
            },
            inputs=config.inputs,
            outputs=config.outputs,
            is_template=False,
        )
        # Passing wrong param
        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param7": {"value": "gs://bucket/path/to/blob/"}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param7": {"value": "s3://test/this/is/bad/key.txt"}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )

        with self.assertRaises(ValidationError):
            ops_params.validate_params(
                params={"param7": {"value": 1}},
                inputs=config.inputs,
                outputs=config.outputs,
                is_template=False,
            )
Example #10
0
    def process_components(self, inputs=None):
        from polyaxon.polyaxonfile.check import collect_references

        inputs = inputs or []
        for _input in inputs:
            self._context["dag.inputs.{}".format(_input.name)] = _input

        if not self.operations:
            raise PolyaxonSchemaError(
                "Pipeline is not valid, it has no ops to validate components.")

        components = self.components or []

        for component in components:
            component_name = component.name
            if component_name in self._components_by_names:
                raise PolyaxonSchemaError(
                    "Pipeline has multiple components with the same name `{}`".
                    format(component_name))
            self._components_by_names[component_name] = component

        for op in self.operations:
            op_name = op.name
            if op.has_url_reference or op.has_path_reference:
                try:
                    op = collect_references(op, self._path_context)
                except Exception as e:
                    raise PolyaxonSchemaError(
                        "Pipeline op with name `{}` requires a component with ref `{}`, "
                        "the reference could not be resolved. Error: {}".
                        format(op_name, op.hub_ref or op.url_ref
                               or op.path_ref, e))
            elif op.has_hub_reference:
                continue
            if op.has_component_reference:
                outputs = op.component.outputs
                inputs = op.component.inputs
            elif op.has_dag_reference:
                component_ref_name = op.dag_ref
                if op_name in self._op_component_mapping:
                    raise PolyaxonSchemaError(
                        "Pipeline has multiple ops with the same name `{}`".
                        format(op_name))
                if component_ref_name not in self._components_by_names:
                    raise PolyaxonSchemaError(
                        "Pipeline op with name `{}` requires a component with name `{}`, "
                        "which is not defined on this pipeline.".format(
                            op_name, component_ref_name))
                self._op_component_mapping[op_name] = component_ref_name
                outputs = self._components_by_names[component_ref_name].outputs
                inputs = self._components_by_names[component_ref_name].inputs
            else:
                raise PolyaxonSchemaError(
                    "Pipeline op has no template field `{}`".format(op_name))

            if outputs:
                for output in outputs:
                    self._context["ops.{}.outputs.{}".format(
                        op_name, output.name)] = output

            if inputs:
                for cinput in inputs:
                    self._context["ops.{}.inputs.{}".format(
                        op_name, cinput.name)] = cinput

            # We allow to resolve name, status, project, all outputs/inputs, iteration
            self._context["ops.{}.inputs".format(op_name)] = V1IO(
                name="inputs", iotype=types.DICT, value={}, is_optional=True)
            self._context["ops.{}.outputs".format(op_name)] = V1IO(
                name="outputs", iotype=types.DICT, value={}, is_optional=True)
            self._context["ops.{}.status".format(op_name)] = V1IO(
                name="status", iotype=types.STR, value="", is_optional=True)
            self._context["ops.{}.name".format(op_name)] = V1IO(
                name="name", iotype=types.STR, value="", is_optional=True)
            self._context["ops.{}.uuid".format(op_name)] = V1IO(
                name="uuid", iotype=types.STR, value="", is_optional=True)
            self._context["ops.{}.project_name".format(op_name)] = V1IO(
                name="project_name",
                iotype=types.STR,
                value="",
                is_optional=True)
            self._context["ops.{}.project_uuid".format(op_name)] = V1IO(
                name="project_uuid",
                iotype=types.STR,
                value="",
                is_optional=True)
            self._context["ops.{}.iteration".format(op_name)] = V1IO(
                name="iteration", iotype=types.STR, value="", is_optional=True)

        for op in self.operations:
            if op.has_hub_reference:
                continue
            elif op.has_component_reference:
                component_ref = op.template.name
                outputs = op.template.outputs
                inputs = op.template.inputs
            elif op.has_dag_reference:
                component_ref = op.template.name
                outputs = self._components_by_names[component_ref].outputs
                inputs = self._components_by_names[component_ref].inputs
            else:
                raise PolyaxonSchemaError(
                    "Pipeline op has no template field `{}`".format(op.name))
            ops_params.validate_params(
                params=op.params,
                inputs=inputs,
                outputs=outputs,
                context=self._context,
                parallel=op.parallel,
                is_template=False,
                check_runs=False,
                extra_info="<op {}>.<component {}>".format(
                    op.name, component_ref),
            )
Example #11
0
    def process_components(self,
                           inputs=None,
                           ignore_hub_validation: bool = False):
        """`ignore_hub_validation` is currently used for ignoring validation
        during tests with hub_ref.
        """
        inputs = inputs or []
        self._context["dag.name"] = V1IO(name="name",
                                         type=types.STR,
                                         value="",
                                         is_optional=True)
        self._context["dag.uuid"] = V1IO(name="uuid",
                                         type=types.STR,
                                         value="",
                                         is_optional=True)
        for _input in inputs:
            self._context["dag.inputs.{}".format(_input.name)] = _input

        if not self.operations:
            raise PolyaxonSchemaError(
                "Pipeline is not valid, it has no ops to validate components.")

        components = self.components or []

        for component in components:
            component_name = component.name
            if component_name in self._components_by_names:
                raise PolyaxonSchemaError(
                    "Pipeline has multiple components with the same name `{}`".
                    format(component_name))
            self._components_by_names[component_name] = component

        for op in self.operations:
            op_name = op.name
            if op.has_component_reference:
                outputs = op.component.outputs
                inputs = op.component.inputs
            elif op.has_dag_reference:
                component_ref_name = op.dag_ref
                if op_name in self._op_component_mapping:
                    raise PolyaxonSchemaError(
                        "Pipeline has multiple ops with the same name `{}`".
                        format(op_name))
                if component_ref_name not in self._components_by_names:
                    raise PolyaxonSchemaError(
                        "Pipeline op with name `{}` requires a component with name `{}`, "
                        "which is not defined on this pipeline.".format(
                            op_name, component_ref_name))
                self._op_component_mapping[op_name] = component_ref_name
                outputs = self._components_by_names[component_ref_name].outputs
                inputs = self._components_by_names[component_ref_name].inputs
            elif op.has_hub_reference and ignore_hub_validation:
                continue
            else:
                raise PolyaxonSchemaError(
                    "Pipeline op has no definition field `{}`".format(op_name))

            if outputs:
                for output in outputs:
                    self._context["ops.{}.outputs.{}".format(
                        op_name, output.name)] = output
                    if output.type == types.ARTIFACTS:
                        self._context["ops.{}.artifacts.{}".format(
                            op_name, output.name)] = output

            if inputs:
                for cinput in inputs:
                    self._context["ops.{}.inputs.{}".format(
                        op_name, cinput.name)] = cinput
                    if cinput.type == types.ARTIFACTS:
                        self._context["ops.{}.artifacts.{}".format(
                            op_name, cinput.name)] = cinput
            for g_context in contexts_sections.GLOBALS_CONTEXTS:
                self._context["ops.{}.globals.{}".format(
                    op_name, g_context)] = V1IO(name=g_context,
                                                type=types.STR,
                                                value="",
                                                is_optional=True)

            # We allow to resolve name, status, project, all outputs/inputs, iteration
            self._context["ops.{}.{}".format(
                op_name, contexts_sections.INPUTS)] = V1IO(name="inputs",
                                                           type=types.DICT,
                                                           value={},
                                                           is_optional=True)
            self._context["ops.{}.{}".format(
                op_name, contexts_sections.OUTPUTS)] = V1IO(name="outputs",
                                                            type=types.DICT,
                                                            value={},
                                                            is_optional=True)
            self._context["ops.{}.{}".format(
                op_name, contexts_sections.GLOBALS)] = V1IO(name="globals",
                                                            type=types.STR,
                                                            value="",
                                                            is_optional=True)
            self._context["ops.{}.{}".format(
                op_name, contexts_sections.ARTIFACTS)] = V1IO(name="artifacts",
                                                              type=types.STR,
                                                              value="",
                                                              is_optional=True)
            self._context["ops.{}.{}".format(
                op_name,
                contexts_sections.INPUTS_OUTPUTS)] = V1IO(name="io",
                                                          type=types.STR,
                                                          value={},
                                                          is_optional=True)

        for op in self.operations:
            if op.has_component_reference:
                component_ref = op.definition.name
                outputs = op.definition.outputs
                inputs = op.definition.inputs
            elif op.has_dag_reference:
                component_ref = op.definition.name
                outputs = self._components_by_names[component_ref].outputs
                inputs = self._components_by_names[component_ref].inputs
            elif op.has_hub_reference and ignore_hub_validation:
                continue
            else:
                raise PolyaxonSchemaError(
                    "Pipeline op has no definition field `{}`".format(op.name))
            ops_params.validate_params(
                params=op.params,
                inputs=inputs,
                outputs=outputs,
                context=self._context,
                matrix=op.matrix,
                joins=op.joins,
                is_template=False,
                check_runs=False,
                extra_info="<op {}>.<component {}>".format(
                    op.name, component_ref),
            )