Exemple #1
0
    async def _validate_node_properties(
        self,
        pipeline_definition: PipelineDefinition,
        pipeline_type: str,
        pipeline_runtime: str,
        response: ValidationResponse,
    ) -> None:
        """
        Validates each of the node's structure for required fields/properties as well as
        their values
        :param pipeline_definition: the pipeline definition to be validated
        :param pipeline_type: name of the pipeline runtime being used e.g. kfp, airflow, generic
        :param pipeline_runtime: name of the pipeline runtime for execution  e.g. kfp, airflow, local
        :param response: ValidationResponse containing the issue list to be updated
        """
        if pipeline_runtime:
            # don't check if incompatible pipeline type and runtime
            if not PipelineValidationManager._is_compatible_pipeline(pipeline_runtime, pipeline_type):
                return

        for pipeline in pipeline_definition.pipelines:
            for node in pipeline.nodes:
                if node.type == "execution_node":
                    if Operation.is_generic_operation(node.op):
                        self._validate_generic_node_properties(
                            node=node, response=response, pipeline_runtime=pipeline_runtime
                        )
                    # Validate runtime components against specific node properties in component registry
                    else:
                        await self._validate_custom_component_node_properties(
                            node=node,
                            response=response,
                            pipeline_runtime=pipeline_runtime,
                            pipeline_definition=pipeline_definition,
                        )
Exemple #2
0
    def get_operation(self) -> Operation:

        self.env_vars = []
        if self.fail:  # NODE_FILENAME is required, so skip if triggering failure
            if "NODE_FILENAME" in os.environ:  # remove entry if present
                os.environ.pop("NODE_FILENAME")
        else:
            self.env_vars.append(f"NODE_FILENAME={self.filename}")
        if self.inputs:
            self.env_vars.append(f"INPUT_FILENAMES={';'.join(self.inputs)}")
        if self.outputs:
            self.env_vars.append(f"OUTPUT_FILENAMES={';'.join(self.outputs)}")

        # Add system-owned here with bogus or no value...
        self.env_vars.append("ELYRA_RUNTIME_ENV=bogus_runtime")

        return Operation(self.id,
                         'execution_node',
                         self.name,
                         self.classifier,
                         self.filename,
                         self.image_name or "NA",
                         dependencies=self.dependencies,
                         env_vars=self.env_vars,
                         inputs=self.inputs,
                         outputs=self.outputs,
                         parent_operations=self.parent_operations)
def test_collect_envs(processor):
    pipelines_test_file = 'elyra/pipeline/tests/resources/archive/test.ipynb'

    test_operation = Operation(id='this-is-a-test-id',
                               type='execution-node',
                               classifier='airflow',
                               name='test',
                               filename=pipelines_test_file,
                               runtime_image='tensorflow/tensorflow:latest')

    envs = processor._collect_envs(test_operation, cos_secret=None, cos_username='******', cos_password='******')

    assert envs['ELYRA_RUNTIME_ENV'] == 'airflow'
    assert envs['AWS_ACCESS_KEY_ID'] == 'Alice'
    assert envs['AWS_SECRET_ACCESS_KEY'] == 'secret'
    assert envs['ELYRA_ENABLE_PIPELINE_INFO'] == 'True'
    assert 'ELYRA_WRITABLE_CONTAINER_DIR' not in envs

    # Repeat with non-None secret - ensure user and password envs are not present, but others are
    envs = processor._collect_envs(test_operation, cos_secret='secret', cos_username='******', cos_password='******')

    assert envs['ELYRA_RUNTIME_ENV'] == 'airflow'
    assert 'AWS_ACCESS_KEY_ID' not in envs
    assert 'AWS_SECRET_ACCESS_KEY' not in envs
    assert envs['ELYRA_ENABLE_PIPELINE_INFO'] == 'True'
    assert 'ELYRA_WRITABLE_CONTAINER_DIR' not in envs
Exemple #4
0
def test_fail_create_operation_missing_classifier():
    component_parameters = {
        "filename": "elyra/pipeline/tests/resources/archive/test.ipynb",
        "runtime_image": "tensorflow/tensorflow:latest",
    }
    with pytest.raises(TypeError):
        Operation(id="test-id", type="execution-node", name="test", component_params=component_parameters)
Exemple #5
0
def test_collect_envs(processor):
    pipelines_test_file = 'elyra/pipeline/tests/resources/archive/test.ipynb'

    # add system-owned envs with bogus values to ensure they get set to system-derived values,
    # and include some user-provided edge cases
    operation_envs = [
        'ELYRA_RUNTIME_ENV="bogus_runtime"',
        'ELYRA_ENABLE_PIPELINE_INFO="bogus_pipeline"',
        'ELYRA_WRITABLE_CONTAINER_DIR=',  # simulate operation reference in pipeline
        'AWS_ACCESS_KEY_ID="bogus_key"',
        'AWS_SECRET_ACCESS_KEY="bogus_secret"',
        'USER_EMPTY_VALUE=  ',
        'USER_TWO_EQUALS=KEY=value',
        'USER_NO_VALUE='
    ]

    test_operation = Operation(id='this-is-a-test-id',
                               type='execution-node',
                               classifier='airflow',
                               name='test',
                               filename=pipelines_test_file,
                               env_vars=operation_envs,
                               runtime_image='tensorflow/tensorflow:latest')

    envs = processor._collect_envs(test_operation,
                                   cos_secret=None,
                                   cos_username='******',
                                   cos_password='******')

    assert envs['ELYRA_RUNTIME_ENV'] == 'airflow'
    assert envs['AWS_ACCESS_KEY_ID'] == 'Alice'
    assert envs['AWS_SECRET_ACCESS_KEY'] == 'secret'
    assert envs['ELYRA_ENABLE_PIPELINE_INFO'] == 'True'
    assert 'ELYRA_WRITABLE_CONTAINER_DIR' not in envs
    assert envs['USER_EMPTY_VALUE'] == '  '
    assert envs['USER_TWO_EQUALS'] == 'KEY=value'
    assert 'USER_NO_VALUE' not in envs

    # Repeat with non-None secret - ensure user and password envs are not present, but others are
    envs = processor._collect_envs(test_operation,
                                   cos_secret='secret',
                                   cos_username='******',
                                   cos_password='******')

    assert envs['ELYRA_RUNTIME_ENV'] == 'airflow'
    assert 'AWS_ACCESS_KEY_ID' not in envs
    assert 'AWS_SECRET_ACCESS_KEY' not in envs
    assert envs['ELYRA_ENABLE_PIPELINE_INFO'] == 'True'
    assert 'ELYRA_WRITABLE_CONTAINER_DIR' not in envs
    assert envs['USER_EMPTY_VALUE'] == '  '
    assert envs['USER_TWO_EQUALS'] == 'KEY=value'
    assert 'USER_NO_VALUE' not in envs
Exemple #6
0
    def propagate_pipeline_default_properties(self):
        """
        For any default pipeline properties set (e.g. runtime image, volume), propagate
        the values to any nodes that do not set their own value for that property.
        """
        # Convert any key-value list pipeline default properties to the KeyValueList type
        kv_properties = PipelineDefinition.get_kv_properties()
        self.primary_pipeline.convert_kv_properties(kv_properties)

        pipeline_default_properties = self.primary_pipeline.get_property(
            PIPELINE_DEFAULTS, {})
        for node in self.pipeline_nodes:
            if not Operation.is_generic_operation(node.op):
                continue

            # Convert any key-value list node properties to the KeyValueList type if not done already
            node.convert_kv_properties(kv_properties)

            for property_name, pipeline_default_value in pipeline_default_properties.items(
            ):
                if not pipeline_default_value:
                    continue

                node_value = node.get_component_parameter(property_name)
                if not node_value:
                    node.set_component_parameter(property_name,
                                                 pipeline_default_value)
                    continue

                if isinstance(pipeline_default_value,
                              KeyValueList) and isinstance(
                                  node_value, KeyValueList):
                    merged_list = KeyValueList.merge(node_value,
                                                     pipeline_default_value)
                    node.set_component_parameter(property_name, merged_list)

            if self.primary_pipeline.runtime_config != "local":
                node.remove_env_vars_with_matching_secrets()

            node.convert_data_class_properties()
Exemple #7
0
    def _create_pipeline_operation(self,
                                   node: Node,
                                   super_node: Node = None) -> Operation:
        """
        Creates a pipeline operation instance from the given node.
        The node and super_node are used to build the list of parent_operation_ids (links) to
        the node (operation dependencies).
        """
        parent_operations = PipelineParser._get_parent_operation_links(
            node.to_dict())  # parse links as dependencies
        if super_node:  # gather parent-links tied to embedded nodes inputs
            parent_operations.extend(
                PipelineParser._get_parent_operation_links(
                    super_node.to_dict(), node.id))

        return Operation.create_instance(
            id=node.id,
            type=node.type,
            classifier=node.op,
            name=node.label,
            parent_operation_ids=parent_operations,
            component_params=node.get("component_parameters", {}),
        )
Exemple #8
0
    def get_operation(self) -> Operation:

        self.env_vars = []
        if self.fail:  # NODE_FILENAME is required, so skip if triggering failure
            os.environ.pop(
                "NODE_FILENAME")  # remove entry that might already be present
        else:
            self.env_vars.append(f"NODE_FILENAME={self.filename}")
        if self.inputs:
            self.env_vars.append(f"INPUT_FILENAMES={';'.join(self.inputs)}")
        if self.outputs:
            self.env_vars.append(f"OUTPUT_FILENAMES={';'.join(self.outputs)}")

        return Operation(self.id,
                         'execution_node',
                         self.classifier,
                         self.filename,
                         self.image_name or "NA",
                         dependencies=self.dependencies,
                         env_vars=self.env_vars,
                         inputs=self.inputs,
                         outputs=self.outputs,
                         parent_operations=self.parent_operations)
Exemple #9
0
def test_processing_filename_runtime_specific_component(
        monkeypatch, processor, sample_metadata, tmpdir):
    # Define the appropriate reader for a filesystem-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)

    # Assign test resource location
    absolute_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), "..", "resources",
                     "components", "download_data.yaml"))

    # Read contents of given path -- read_component_definition() returns a
    # a dictionary of component definition content indexed by path
    entry_data = reader.get_entry_data({"path": absolute_path}, {})
    component_definition = entry_data.definition

    # Instantiate a file-based component
    component_id = "test-component"
    component = Component(
        id=component_id,
        name="Download data",
        description="",
        op="download-data",
        catalog_type="elyra-kfp-examples-catalog",
        component_reference={"path": absolute_path},
        definition=component_definition,
        properties=[],
        categories=[],
    )

    # Fabricate the component cache to include single filename-based component for testing
    ComponentCache.instance()._component_cache[processor._type.name] = {
        "spoofed_catalog": {
            "components": {
                component_id: component
            }
        }
    }

    # Construct hypothetical operation for component
    operation_name = "Download data test"
    operation_params = {
        "url":
        "https://raw.githubusercontent.com/elyra-ai/elyra/master/tests/assets/helloworld.ipynb",
        "curl_options": "--location",
    }
    operation = Operation(
        id="download-data-id",
        type="execution_node",
        classifier=component_id,
        name=operation_name,
        parent_operation_ids=[],
        component_params=operation_params,
    )

    # Build a mock runtime config for use in _cc_pipeline
    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="kfp",
                              metadata=sample_metadata)

    mocked_func = mock.Mock(return_value="default",
                            side_effect=[mocked_runtime, sample_metadata])
    monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)

    # Construct single-operation pipeline
    pipeline = Pipeline(id="pipeline-id",
                        name="kfp_test",
                        runtime="kfp",
                        runtime_config="test",
                        source="download_data.pipeline")
    pipeline.operations[operation.id] = operation

    # Establish path and function to construct pipeline
    pipeline_path = os.path.join(tmpdir, "kfp_test.yaml")
    constructed_pipeline_function = lambda: processor._cc_pipeline(
        pipeline=pipeline, pipeline_name="test_pipeline")

    # TODO Check against both argo and tekton compilations
    # Compile pipeline and save into pipeline_path
    kfp_argo_compiler.Compiler().compile(constructed_pipeline_function,
                                         pipeline_path)

    # Read contents of pipeline YAML
    with open(pipeline_path) as f:
        pipeline_yaml = yaml.safe_load(f.read())

    # Check the pipeline file contents for correctness
    pipeline_template = pipeline_yaml["spec"]["templates"][0]
    assert pipeline_template["metadata"]["annotations"][
        "pipelines.kubeflow.org/task_display_name"] == operation_name
    assert pipeline_template["container"]["command"][3] == operation_params[
        "url"]
Exemple #10
0
def test_scrub_list_function():
    env_variables_input = ["FOO=Bar", "BAR=Foo", None, ""]
    env_variables_output = ["FOO=Bar", "BAR=Foo"]

    assert Operation._scrub_list(env_variables_input) == env_variables_output
Exemple #11
0
def test_cc_pipeline_component_no_input(monkeypatch, processor,
                                        component_cache, sample_metadata,
                                        tmpdir):
    """
    Verifies that cc_pipeline can handle KFP component definitions that don't
    include any inputs
    """
    # Define the appropriate reader for a filesystem-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)

    # Assign test resource location
    cpath = (Path(__file__).parent / ".." / "resources" / "components" /
             "kfp_test_operator_no_inputs.yaml").resolve()
    assert cpath.is_file()
    cpath = str(cpath)

    # Read contents of given path -- read_component_definition() returns a
    # a dictionary of component definition content indexed by path
    entry_data = reader.get_entry_data({"path": cpath}, {})
    component_definition = entry_data.definition

    # Instantiate a file-based component
    component_id = "test-component"
    component = Component(
        id=component_id,
        name="No input data",
        description="",
        op="no-input-data",
        catalog_type="elyra-kfp-examples-catalog",
        component_reference={"path": cpath},
        definition=component_definition,
        properties=[],
        categories=[],
    )

    # Fabricate the component cache to include single filename-based component for testing
    component_cache._component_cache[processor._type.name] = {
        "spoofed_catalog": {
            "components": {
                component_id: component
            }
        }
    }

    # Construct hypothetical operation for component
    operation_name = "no-input-test"
    operation_params = {}
    operation = Operation(
        id="no-input-id",
        type="execution_node",
        classifier=component_id,
        name=operation_name,
        parent_operation_ids=[],
        component_params=operation_params,
    )

    # Build a mock runtime config for use in _cc_pipeline
    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="kfp",
                              metadata=sample_metadata)

    mocked_func = mock.Mock(return_value="default",
                            side_effect=[mocked_runtime, sample_metadata])
    monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)

    # Construct single-operation pipeline
    pipeline = Pipeline(id="pipeline-id",
                        name="kfp_test",
                        runtime="kfp",
                        runtime_config="test",
                        source="no_input.pipeline")
    pipeline.operations[operation.id] = operation

    constructed_pipeline_function = lambda: processor._cc_pipeline(
        pipeline=pipeline, pipeline_name="test_pipeline")
    pipeline_path = str(Path(tmpdir) / "no_inputs_test.yaml")

    # Compile pipeline and save into pipeline_path
    kfp_argo_compiler.Compiler().compile(constructed_pipeline_function,
                                         pipeline_path)