async def _validate_node_properties( self, pipeline_definition: PipelineDefinition, pipeline_type: str, pipeline_runtime: str, response: ValidationResponse, ) -> None: """ Validates each of the node's structure for required fields/properties as well as their values :param pipeline_definition: the pipeline definition to be validated :param pipeline_type: name of the pipeline runtime being used e.g. kfp, airflow, generic :param pipeline_runtime: name of the pipeline runtime for execution e.g. kfp, airflow, local :param response: ValidationResponse containing the issue list to be updated """ if pipeline_runtime: # don't check if incompatible pipeline type and runtime if not PipelineValidationManager._is_compatible_pipeline(pipeline_runtime, pipeline_type): return for pipeline in pipeline_definition.pipelines: for node in pipeline.nodes: if node.type == "execution_node": if Operation.is_generic_operation(node.op): self._validate_generic_node_properties( node=node, response=response, pipeline_runtime=pipeline_runtime ) # Validate runtime components against specific node properties in component registry else: await self._validate_custom_component_node_properties( node=node, response=response, pipeline_runtime=pipeline_runtime, pipeline_definition=pipeline_definition, )
def get_operation(self) -> Operation: self.env_vars = [] if self.fail: # NODE_FILENAME is required, so skip if triggering failure if "NODE_FILENAME" in os.environ: # remove entry if present os.environ.pop("NODE_FILENAME") else: self.env_vars.append(f"NODE_FILENAME={self.filename}") if self.inputs: self.env_vars.append(f"INPUT_FILENAMES={';'.join(self.inputs)}") if self.outputs: self.env_vars.append(f"OUTPUT_FILENAMES={';'.join(self.outputs)}") # Add system-owned here with bogus or no value... self.env_vars.append("ELYRA_RUNTIME_ENV=bogus_runtime") return Operation(self.id, 'execution_node', self.name, self.classifier, self.filename, self.image_name or "NA", dependencies=self.dependencies, env_vars=self.env_vars, inputs=self.inputs, outputs=self.outputs, parent_operations=self.parent_operations)
def test_collect_envs(processor): pipelines_test_file = 'elyra/pipeline/tests/resources/archive/test.ipynb' test_operation = Operation(id='this-is-a-test-id', type='execution-node', classifier='airflow', name='test', filename=pipelines_test_file, runtime_image='tensorflow/tensorflow:latest') envs = processor._collect_envs(test_operation, cos_secret=None, cos_username='******', cos_password='******') assert envs['ELYRA_RUNTIME_ENV'] == 'airflow' assert envs['AWS_ACCESS_KEY_ID'] == 'Alice' assert envs['AWS_SECRET_ACCESS_KEY'] == 'secret' assert envs['ELYRA_ENABLE_PIPELINE_INFO'] == 'True' assert 'ELYRA_WRITABLE_CONTAINER_DIR' not in envs # Repeat with non-None secret - ensure user and password envs are not present, but others are envs = processor._collect_envs(test_operation, cos_secret='secret', cos_username='******', cos_password='******') assert envs['ELYRA_RUNTIME_ENV'] == 'airflow' assert 'AWS_ACCESS_KEY_ID' not in envs assert 'AWS_SECRET_ACCESS_KEY' not in envs assert envs['ELYRA_ENABLE_PIPELINE_INFO'] == 'True' assert 'ELYRA_WRITABLE_CONTAINER_DIR' not in envs
def test_fail_create_operation_missing_classifier(): component_parameters = { "filename": "elyra/pipeline/tests/resources/archive/test.ipynb", "runtime_image": "tensorflow/tensorflow:latest", } with pytest.raises(TypeError): Operation(id="test-id", type="execution-node", name="test", component_params=component_parameters)
def test_collect_envs(processor): pipelines_test_file = 'elyra/pipeline/tests/resources/archive/test.ipynb' # add system-owned envs with bogus values to ensure they get set to system-derived values, # and include some user-provided edge cases operation_envs = [ 'ELYRA_RUNTIME_ENV="bogus_runtime"', 'ELYRA_ENABLE_PIPELINE_INFO="bogus_pipeline"', 'ELYRA_WRITABLE_CONTAINER_DIR=', # simulate operation reference in pipeline 'AWS_ACCESS_KEY_ID="bogus_key"', 'AWS_SECRET_ACCESS_KEY="bogus_secret"', 'USER_EMPTY_VALUE= ', 'USER_TWO_EQUALS=KEY=value', 'USER_NO_VALUE=' ] test_operation = Operation(id='this-is-a-test-id', type='execution-node', classifier='airflow', name='test', filename=pipelines_test_file, env_vars=operation_envs, runtime_image='tensorflow/tensorflow:latest') envs = processor._collect_envs(test_operation, cos_secret=None, cos_username='******', cos_password='******') assert envs['ELYRA_RUNTIME_ENV'] == 'airflow' assert envs['AWS_ACCESS_KEY_ID'] == 'Alice' assert envs['AWS_SECRET_ACCESS_KEY'] == 'secret' assert envs['ELYRA_ENABLE_PIPELINE_INFO'] == 'True' assert 'ELYRA_WRITABLE_CONTAINER_DIR' not in envs assert envs['USER_EMPTY_VALUE'] == ' ' assert envs['USER_TWO_EQUALS'] == 'KEY=value' assert 'USER_NO_VALUE' not in envs # Repeat with non-None secret - ensure user and password envs are not present, but others are envs = processor._collect_envs(test_operation, cos_secret='secret', cos_username='******', cos_password='******') assert envs['ELYRA_RUNTIME_ENV'] == 'airflow' assert 'AWS_ACCESS_KEY_ID' not in envs assert 'AWS_SECRET_ACCESS_KEY' not in envs assert envs['ELYRA_ENABLE_PIPELINE_INFO'] == 'True' assert 'ELYRA_WRITABLE_CONTAINER_DIR' not in envs assert envs['USER_EMPTY_VALUE'] == ' ' assert envs['USER_TWO_EQUALS'] == 'KEY=value' assert 'USER_NO_VALUE' not in envs
def propagate_pipeline_default_properties(self): """ For any default pipeline properties set (e.g. runtime image, volume), propagate the values to any nodes that do not set their own value for that property. """ # Convert any key-value list pipeline default properties to the KeyValueList type kv_properties = PipelineDefinition.get_kv_properties() self.primary_pipeline.convert_kv_properties(kv_properties) pipeline_default_properties = self.primary_pipeline.get_property( PIPELINE_DEFAULTS, {}) for node in self.pipeline_nodes: if not Operation.is_generic_operation(node.op): continue # Convert any key-value list node properties to the KeyValueList type if not done already node.convert_kv_properties(kv_properties) for property_name, pipeline_default_value in pipeline_default_properties.items( ): if not pipeline_default_value: continue node_value = node.get_component_parameter(property_name) if not node_value: node.set_component_parameter(property_name, pipeline_default_value) continue if isinstance(pipeline_default_value, KeyValueList) and isinstance( node_value, KeyValueList): merged_list = KeyValueList.merge(node_value, pipeline_default_value) node.set_component_parameter(property_name, merged_list) if self.primary_pipeline.runtime_config != "local": node.remove_env_vars_with_matching_secrets() node.convert_data_class_properties()
def _create_pipeline_operation(self, node: Node, super_node: Node = None) -> Operation: """ Creates a pipeline operation instance from the given node. The node and super_node are used to build the list of parent_operation_ids (links) to the node (operation dependencies). """ parent_operations = PipelineParser._get_parent_operation_links( node.to_dict()) # parse links as dependencies if super_node: # gather parent-links tied to embedded nodes inputs parent_operations.extend( PipelineParser._get_parent_operation_links( super_node.to_dict(), node.id)) return Operation.create_instance( id=node.id, type=node.type, classifier=node.op, name=node.label, parent_operation_ids=parent_operations, component_params=node.get("component_parameters", {}), )
def get_operation(self) -> Operation: self.env_vars = [] if self.fail: # NODE_FILENAME is required, so skip if triggering failure os.environ.pop( "NODE_FILENAME") # remove entry that might already be present else: self.env_vars.append(f"NODE_FILENAME={self.filename}") if self.inputs: self.env_vars.append(f"INPUT_FILENAMES={';'.join(self.inputs)}") if self.outputs: self.env_vars.append(f"OUTPUT_FILENAMES={';'.join(self.outputs)}") return Operation(self.id, 'execution_node', self.classifier, self.filename, self.image_name or "NA", dependencies=self.dependencies, env_vars=self.env_vars, inputs=self.inputs, outputs=self.outputs, parent_operations=self.parent_operations)
def test_processing_filename_runtime_specific_component( monkeypatch, processor, sample_metadata, tmpdir): # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Assign test resource location absolute_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", "resources", "components", "download_data.yaml")) # Read contents of given path -- read_component_definition() returns a # a dictionary of component definition content indexed by path entry_data = reader.get_entry_data({"path": absolute_path}, {}) component_definition = entry_data.definition # Instantiate a file-based component component_id = "test-component" component = Component( id=component_id, name="Download data", description="", op="download-data", catalog_type="elyra-kfp-examples-catalog", component_reference={"path": absolute_path}, definition=component_definition, properties=[], categories=[], ) # Fabricate the component cache to include single filename-based component for testing ComponentCache.instance()._component_cache[processor._type.name] = { "spoofed_catalog": { "components": { component_id: component } } } # Construct hypothetical operation for component operation_name = "Download data test" operation_params = { "url": "https://raw.githubusercontent.com/elyra-ai/elyra/master/tests/assets/helloworld.ipynb", "curl_options": "--location", } operation = Operation( id="download-data-id", type="execution_node", classifier=component_id, name=operation_name, parent_operation_ids=[], component_params=operation_params, ) # Build a mock runtime config for use in _cc_pipeline mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) # Construct single-operation pipeline pipeline = Pipeline(id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="download_data.pipeline") pipeline.operations[operation.id] = operation # Establish path and function to construct pipeline pipeline_path = os.path.join(tmpdir, "kfp_test.yaml") constructed_pipeline_function = lambda: processor._cc_pipeline( pipeline=pipeline, pipeline_name="test_pipeline") # TODO Check against both argo and tekton compilations # Compile pipeline and save into pipeline_path kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path) # Read contents of pipeline YAML with open(pipeline_path) as f: pipeline_yaml = yaml.safe_load(f.read()) # Check the pipeline file contents for correctness pipeline_template = pipeline_yaml["spec"]["templates"][0] assert pipeline_template["metadata"]["annotations"][ "pipelines.kubeflow.org/task_display_name"] == operation_name assert pipeline_template["container"]["command"][3] == operation_params[ "url"]
def test_scrub_list_function(): env_variables_input = ["FOO=Bar", "BAR=Foo", None, ""] env_variables_output = ["FOO=Bar", "BAR=Foo"] assert Operation._scrub_list(env_variables_input) == env_variables_output
def test_cc_pipeline_component_no_input(monkeypatch, processor, component_cache, sample_metadata, tmpdir): """ Verifies that cc_pipeline can handle KFP component definitions that don't include any inputs """ # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Assign test resource location cpath = (Path(__file__).parent / ".." / "resources" / "components" / "kfp_test_operator_no_inputs.yaml").resolve() assert cpath.is_file() cpath = str(cpath) # Read contents of given path -- read_component_definition() returns a # a dictionary of component definition content indexed by path entry_data = reader.get_entry_data({"path": cpath}, {}) component_definition = entry_data.definition # Instantiate a file-based component component_id = "test-component" component = Component( id=component_id, name="No input data", description="", op="no-input-data", catalog_type="elyra-kfp-examples-catalog", component_reference={"path": cpath}, definition=component_definition, properties=[], categories=[], ) # Fabricate the component cache to include single filename-based component for testing component_cache._component_cache[processor._type.name] = { "spoofed_catalog": { "components": { component_id: component } } } # Construct hypothetical operation for component operation_name = "no-input-test" operation_params = {} operation = Operation( id="no-input-id", type="execution_node", classifier=component_id, name=operation_name, parent_operation_ids=[], component_params=operation_params, ) # Build a mock runtime config for use in _cc_pipeline mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) # Construct single-operation pipeline pipeline = Pipeline(id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="no_input.pipeline") pipeline.operations[operation.id] = operation constructed_pipeline_function = lambda: processor._cc_pipeline( pipeline=pipeline, pipeline_name="test_pipeline") pipeline_path = str(Path(tmpdir) / "no_inputs_test.yaml") # Compile pipeline and save into pipeline_path kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path)