def test_incomplete_params(self): config_dict = { "inputs": [ {"name": "param1", "type": types.INT}, {"name": "param2", "type": types.INT}, ], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": {"value": 1}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) config_dict = { "outputs": [ {"name": "param1", "type": types.INT, "value": 12, "isOptional": True}, {"name": "param2", "type": types.INT}, ], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) ops_params.validate_params( params={"param1": {"value": 1}}, inputs=config.inputs, outputs=config.outputs, is_template=False, )
def test_extra_params(self): # inputs config_dict = { "inputs": [{"name": "param1", "type": types.INT}], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": {"value": 1}, "param2": {"value": 2}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # outputs config_dict = { "outputs": [{"name": "param1", "type": types.INT}], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": {"value": 1}, "param2": {"value": 2}}, inputs=config.inputs, outputs=config.outputs, is_template=False, )
def test_job_refs_params(self): config_dict = { "inputs": [ { "name": "param1", "type": types.INT }, { "name": "param9", "type": types.FLOAT }, ], "run": { "kind": V1RunKind.JOB, "container": { "image": "test" } }, } params = { "param1": { "ref": "job.A", "value": "outputs.foo" }, "param9": { "value": 13.1 }, } config = V1CompiledOperation.from_dict(config_dict) # Validation outside the context of a pipeline with self.assertRaises(ValidationError): ops_params.validate_params(params=params, inputs=config.inputs, outputs=None, is_template=False)
def test_param_validation_with_outputs(self): config_dict = { "outputs": [ {"name": "param1", "type": types.STR}, {"name": "param2", "type": types.INT}, {"name": "param3", "type": types.FLOAT}, {"name": "param4", "type": types.BOOL}, {"name": "param5", "type": types.DICT}, {"name": "param6", "type": types.LIST}, {"name": "param7", "type": types.GCS}, {"name": "param8", "type": types.S3}, {"name": "param9", "type": types.WASB}, {"name": "param10", "type": types.PATH}, {"name": "param11", "type": types.METRIC}, {"name": "param12", "type": types.METADATA}, {"name": "param13", "type": types.METADATA}, {"name": "param14", "type": types.METADATA}, ], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } op = V1CompiledOperation.from_dict(config_dict) params = { "param1": {"value": "text"}, "param2": {"value": 12}, "param3": {"value": 13.3}, "param4": {"value": False}, "param5": {"value": {"foo": "bar"}}, "param6": {"value": [1, 3, 45, 5]}, "param7": {"value": "gs://bucket/path/to/blob/"}, "param8": {"value": "s3://test/this/is/bad/key.txt"}, "param9": {"value": "wasbs://[email protected]/"}, "param10": {"value": "/foo/bar"}, "param11": {"value": 124.4}, "param12": {"value": {"foo": 124.4}}, "param13": {"value": {"foo": "bar"}}, "param14": {"value": {"foo": ["foo", 124.4]}}, } validated_params = ops_params.validate_params( params=params, inputs=None, outputs=op.outputs, is_template=False ) assert params == {p.name: p.param.to_dict() for p in validated_params} # Passing missing params params.pop("param1") params.pop("param2") validated_params = ops_params.validate_params( params=params, inputs=None, outputs=op.outputs, is_template=False ) params["param1"] = {"value": None} params["param2"] = {"value": None} assert params == {p.name: p.param.to_dict() for p in validated_params}
def test_experiment_and_job_refs_params(self): config_dict = { "inputs": [ {"name": "param1", "type": types.INT}, {"name": "param2", "type": types.FLOAT}, {"name": "param9", "type": types.WASB}, {"name": "param11", "type": types.METRIC}, ], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } op = V1CompiledOperation.from_dict(config_dict) params = { "param1": { "ref": "runs.64332180bfce46eba80a65caf73c5396", "value": "outputs.foo", }, "param2": { "ref": "runs.0de53b5bf8b04a219d12a39c6b92bcce", "value": "outputs.foo", }, "param9": {"value": "wasbs://[email protected]/"}, "param11": { "ref": "runs.fcc462d764104eb698d3cca509f34154", "value": "outputs.accuracy", }, } validated_params = ops_params.validate_params( params=params, inputs=op.inputs, outputs=None, is_template=False ) assert {p.name: p.param.to_dict() for p in validated_params} == params
def validate_params(self, params=None, context=None, is_template=True, check_runs=False) -> List[ParamSpec]: return ops_params.validate_params( inputs=self.inputs, outputs=self.outputs, params=params, matrix=self.matrix, context=context, is_template=is_template, check_runs=check_runs, )
def validate_params( self, params: Dict = None, context: Dict = None, is_template: bool = True, check_runs: bool = False, parse_values: bool = False, ) -> List[ParamSpec]: return ops_params.validate_params( inputs=self.inputs, outputs=self.outputs, params=params, matrix=self.matrix, context=context, is_template=is_template, check_runs=check_runs, parse_values=parse_values, )
def test_required_input_no_param_only_validated_on_run(self): # Inputs config_dict = { "inputs": [ {"name": "param1", "type": types.STR}, {"name": "param10", "type": types.PATH}, ], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": {"value": "text"}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # Outputs config_dict = { "outputs": [ {"name": "param1", "type": types.STR}, {"name": "param10", "type": types.PATH}, ], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) ops_params.validate_params( params={"param1": {"value": "text"}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # IO config_dict = { "inputs": [{"name": "param1", "type": types.STR}], "outputs": [{"name": "param10", "type": types.PATH}], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) ops_params.validate_params( params={"param1": {"value": "text"}}, inputs=config.inputs, outputs=config.outputs, is_template=False, )
def test_param_validation_with_mismatched_outputs(self): config_dict = { "outputs": [{"name": "param1", "type": types.INT}], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) # Passing correct param ops_params.validate_params( params={"param1": {"value": 1}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # Passing wrong type with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": {"value": "text"}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": {"value": 12.1}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": {"value": {"foo": "bar"}}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param1": {"value": "gs://bucket/path/to/blob/"}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) config_dict = { "outputs": [{"name": "param2", "type": types.STR}], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) # Passing correct param ops_params.validate_params( params={"param2": {"value": "text"}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # Passing wrong type with self.assertRaises(ValidationError): ops_params.validate_params( params={"param2": {"value": 1}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param2": {"value": False}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param2": {"value": {"foo": "bar"}}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param2": {"value": ["gs://bucket/path/to/blob/"]}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) config_dict = { "outputs": [{"name": "param7", "type": types.WASB}], "run": {"kind": V1RunKind.JOB, "container": {"image": "test"}}, } config = V1CompiledOperation.from_dict(config_dict) # Passing correct param ops_params.validate_params( params={ "param7": {"value": "wasbs://[email protected]/"} }, inputs=config.inputs, outputs=config.outputs, is_template=False, ) # Passing wrong param with self.assertRaises(ValidationError): ops_params.validate_params( params={"param7": {"value": "gs://bucket/path/to/blob/"}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param7": {"value": "s3://test/this/is/bad/key.txt"}}, inputs=config.inputs, outputs=config.outputs, is_template=False, ) with self.assertRaises(ValidationError): ops_params.validate_params( params={"param7": {"value": 1}}, inputs=config.inputs, outputs=config.outputs, is_template=False, )
def process_components(self, inputs=None): from polyaxon.polyaxonfile.check import collect_references inputs = inputs or [] for _input in inputs: self._context["dag.inputs.{}".format(_input.name)] = _input if not self.operations: raise PolyaxonSchemaError( "Pipeline is not valid, it has no ops to validate components.") components = self.components or [] for component in components: component_name = component.name if component_name in self._components_by_names: raise PolyaxonSchemaError( "Pipeline has multiple components with the same name `{}`". format(component_name)) self._components_by_names[component_name] = component for op in self.operations: op_name = op.name if op.has_url_reference or op.has_path_reference: try: op = collect_references(op, self._path_context) except Exception as e: raise PolyaxonSchemaError( "Pipeline op with name `{}` requires a component with ref `{}`, " "the reference could not be resolved. Error: {}". format(op_name, op.hub_ref or op.url_ref or op.path_ref, e)) elif op.has_hub_reference: continue if op.has_component_reference: outputs = op.component.outputs inputs = op.component.inputs elif op.has_dag_reference: component_ref_name = op.dag_ref if op_name in self._op_component_mapping: raise PolyaxonSchemaError( "Pipeline has multiple ops with the same name `{}`". format(op_name)) if component_ref_name not in self._components_by_names: raise PolyaxonSchemaError( "Pipeline op with name `{}` requires a component with name `{}`, " "which is not defined on this pipeline.".format( op_name, component_ref_name)) self._op_component_mapping[op_name] = component_ref_name outputs = self._components_by_names[component_ref_name].outputs inputs = self._components_by_names[component_ref_name].inputs else: raise PolyaxonSchemaError( "Pipeline op has no template field `{}`".format(op_name)) if outputs: for output in outputs: self._context["ops.{}.outputs.{}".format( op_name, output.name)] = output if inputs: for cinput in inputs: self._context["ops.{}.inputs.{}".format( op_name, cinput.name)] = cinput # We allow to resolve name, status, project, all outputs/inputs, iteration self._context["ops.{}.inputs".format(op_name)] = V1IO( name="inputs", iotype=types.DICT, value={}, is_optional=True) self._context["ops.{}.outputs".format(op_name)] = V1IO( name="outputs", iotype=types.DICT, value={}, is_optional=True) self._context["ops.{}.status".format(op_name)] = V1IO( name="status", iotype=types.STR, value="", is_optional=True) self._context["ops.{}.name".format(op_name)] = V1IO( name="name", iotype=types.STR, value="", is_optional=True) self._context["ops.{}.uuid".format(op_name)] = V1IO( name="uuid", iotype=types.STR, value="", is_optional=True) self._context["ops.{}.project_name".format(op_name)] = V1IO( name="project_name", iotype=types.STR, value="", is_optional=True) self._context["ops.{}.project_uuid".format(op_name)] = V1IO( name="project_uuid", iotype=types.STR, value="", is_optional=True) self._context["ops.{}.iteration".format(op_name)] = V1IO( name="iteration", iotype=types.STR, value="", is_optional=True) for op in self.operations: if op.has_hub_reference: continue elif op.has_component_reference: component_ref = op.template.name outputs = op.template.outputs inputs = op.template.inputs elif op.has_dag_reference: component_ref = op.template.name outputs = self._components_by_names[component_ref].outputs inputs = self._components_by_names[component_ref].inputs else: raise PolyaxonSchemaError( "Pipeline op has no template field `{}`".format(op.name)) ops_params.validate_params( params=op.params, inputs=inputs, outputs=outputs, context=self._context, parallel=op.parallel, is_template=False, check_runs=False, extra_info="<op {}>.<component {}>".format( op.name, component_ref), )
def process_components(self, inputs=None, ignore_hub_validation: bool = False): """`ignore_hub_validation` is currently used for ignoring validation during tests with hub_ref. """ inputs = inputs or [] self._context["dag.name"] = V1IO(name="name", type=types.STR, value="", is_optional=True) self._context["dag.uuid"] = V1IO(name="uuid", type=types.STR, value="", is_optional=True) for _input in inputs: self._context["dag.inputs.{}".format(_input.name)] = _input if not self.operations: raise PolyaxonSchemaError( "Pipeline is not valid, it has no ops to validate components.") components = self.components or [] for component in components: component_name = component.name if component_name in self._components_by_names: raise PolyaxonSchemaError( "Pipeline has multiple components with the same name `{}`". format(component_name)) self._components_by_names[component_name] = component for op in self.operations: op_name = op.name if op.has_component_reference: outputs = op.component.outputs inputs = op.component.inputs elif op.has_dag_reference: component_ref_name = op.dag_ref if op_name in self._op_component_mapping: raise PolyaxonSchemaError( "Pipeline has multiple ops with the same name `{}`". format(op_name)) if component_ref_name not in self._components_by_names: raise PolyaxonSchemaError( "Pipeline op with name `{}` requires a component with name `{}`, " "which is not defined on this pipeline.".format( op_name, component_ref_name)) self._op_component_mapping[op_name] = component_ref_name outputs = self._components_by_names[component_ref_name].outputs inputs = self._components_by_names[component_ref_name].inputs elif op.has_hub_reference and ignore_hub_validation: continue else: raise PolyaxonSchemaError( "Pipeline op has no definition field `{}`".format(op_name)) if outputs: for output in outputs: self._context["ops.{}.outputs.{}".format( op_name, output.name)] = output if output.type == types.ARTIFACTS: self._context["ops.{}.artifacts.{}".format( op_name, output.name)] = output if inputs: for cinput in inputs: self._context["ops.{}.inputs.{}".format( op_name, cinput.name)] = cinput if cinput.type == types.ARTIFACTS: self._context["ops.{}.artifacts.{}".format( op_name, cinput.name)] = cinput for g_context in contexts_sections.GLOBALS_CONTEXTS: self._context["ops.{}.globals.{}".format( op_name, g_context)] = V1IO(name=g_context, type=types.STR, value="", is_optional=True) # We allow to resolve name, status, project, all outputs/inputs, iteration self._context["ops.{}.{}".format( op_name, contexts_sections.INPUTS)] = V1IO(name="inputs", type=types.DICT, value={}, is_optional=True) self._context["ops.{}.{}".format( op_name, contexts_sections.OUTPUTS)] = V1IO(name="outputs", type=types.DICT, value={}, is_optional=True) self._context["ops.{}.{}".format( op_name, contexts_sections.GLOBALS)] = V1IO(name="globals", type=types.STR, value="", is_optional=True) self._context["ops.{}.{}".format( op_name, contexts_sections.ARTIFACTS)] = V1IO(name="artifacts", type=types.STR, value="", is_optional=True) self._context["ops.{}.{}".format( op_name, contexts_sections.INPUTS_OUTPUTS)] = V1IO(name="io", type=types.STR, value={}, is_optional=True) for op in self.operations: if op.has_component_reference: component_ref = op.definition.name outputs = op.definition.outputs inputs = op.definition.inputs elif op.has_dag_reference: component_ref = op.definition.name outputs = self._components_by_names[component_ref].outputs inputs = self._components_by_names[component_ref].inputs elif op.has_hub_reference and ignore_hub_validation: continue else: raise PolyaxonSchemaError( "Pipeline op has no definition field `{}`".format(op.name)) ops_params.validate_params( params=op.params, inputs=inputs, outputs=outputs, context=self._context, matrix=op.matrix, joins=op.joins, is_template=False, check_runs=False, extra_info="<op {}>.<component {}>".format( op.name, component_ref), )