Example #1
0
    async def get(self, runtime_type):
        self.log.debug(
            f"Retrieving pipeline components for runtime type: {runtime_type}")

        processor_manager = PipelineProcessorManager.instance()
        if processor_manager.is_supported_runtime(runtime_type):
            # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp',
            # 'airflow'). This case and its associated functions should eventually be removed
            # in favor of using the RuntimeProcessorType name in the request path.
            self.log.warning(
                f"Deprecation warning: when calling endpoint '{self.request.path}' "
                f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') "
                f"instead of shorthand name (e.g., 'kfp', 'airflow')")
            runtime_processor_type = processor_manager.get_runtime_type(
                runtime_type)
        elif processor_manager.is_supported_runtime_type(runtime_type):
            # The request path uses the appropriate RuntimeProcessorType name. Use this
            # to get the RuntimeProcessorType instance to pass to get_all_components
            runtime_processor_type = RuntimeProcessorType.get_instance_by_name(
                runtime_type)
        else:
            raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'")

        # Include generic components for all runtime types
        components: List[Component] = ComponentCache.get_generic_components()

        # Add additional runtime-type-specific components, if present
        components.extend(ComponentCache.instance().get_all_components(
            platform=runtime_processor_type))

        palette_json = ComponentCache.to_canvas_palette(components=components)

        self.set_status(200)
        self.set_header("Content-Type", "application/json")
        await self.finish(palette_json)
Example #2
0
    def get_component(self, component_id: str) -> Optional[Component]:
        """
        Retrieve runtime-specific component details if component_id is not one of the generic set
        """

        if component_id not in ("notebook", "python-script", "r-script"):
            return ComponentCache.instance().get_component(platform=self._type, component_id=component_id)

        return ComponentCache.get_generic_component(component_id)
Example #3
0
 def initialize_settings(self):
     self.log.info(f"Config {self.config}")
     # Instantiate singletons with appropriate parent to enable configurability, and convey
     # root_dir to PipelineProcessorManager.
     PipelineProcessorRegistry.instance(root_dir=self.settings["server_root_dir"], parent=self)
     PipelineProcessorManager.instance(root_dir=self.settings["server_root_dir"], parent=self)
     PipelineValidationManager.instance(root_dir=self.settings["server_root_dir"], parent=self)
     FileMetadataCache.instance(parent=self)
     ComponentCache.instance(parent=self).load()
     SchemaManager.instance(parent=self)
Example #4
0
    def get_components(self) -> List[Component]:
        """
        Retrieve components common to all runtimes
        """
        components: List[Component] = ComponentCache.get_generic_components()

        # Retrieve runtime-specific components
        components.extend(ComponentCache.instance().get_all_components(platform=self._type))

        return components
Example #5
0
def component_cache(jp_environ):
    """
    Initialize a component cache
    """
    # Create new instance and load the cache
    component_cache = ComponentCache.instance(emulate_server_app=True)
    component_cache.load()

    yield component_cache
    component_cache.cache_manager.stop()
    ComponentCache.clear_instance()
Example #6
0
    async def get(self, runtime_type, component_id):
        self.log.debug(
            f"Retrieving pipeline component properties for component: {component_id}"
        )

        if not component_id:
            raise web.HTTPError(400, "Missing component ID")

        processor_manager = PipelineProcessorManager.instance()
        if processor_manager.is_supported_runtime(runtime_type):
            # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp',
            # 'airflow'). This case and its associated functions should eventually be removed
            # in favor of using the RuntimeProcessorType name in the request path.
            self.log.warning(
                f"Deprecation warning: when calling endpoint '{self.request.path}' "
                f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') "
                f"instead of shorthand name (e.g., 'kfp', 'airflow')")
            runtime_processor_type = processor_manager.get_runtime_type(
                runtime_type)
        elif processor_manager.is_supported_runtime_type(runtime_type):
            # The request path uses the appropriate RuntimeProcessorType name. Use this
            # to get the RuntimeProcessorType instance to pass to get_component
            runtime_processor_type = RuntimeProcessorType.get_instance_by_name(
                runtime_type)
        else:
            raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'")

        # Try to get component_id as a generic component; assigns None if id is not a generic component
        component: Optional[Component] = ComponentCache.get_generic_component(
            component_id)

        # Try to retrieve a runtime-type-specific component; assigns None if not found
        if not component:
            component = ComponentCache.instance().get_component(
                platform=runtime_processor_type, component_id=component_id)

        if not component:
            raise web.HTTPError(404, f"Component '{component_id}' not found")

        if self.request.path.endswith("/properties"):
            # Return complete set of component properties
            json_response = ComponentCache.to_canvas_properties(component)
        else:
            # Return component definition content
            json_response = json.dumps({
                "content":
                component.definition,
                "mimeType":
                self.get_mimetype(component.file_extension)
            })

        self.set_status(200)
        self.set_header("Content-Type", "application/json")
        await self.finish(json_response)
def test_parse_kfp_component_file_no_inputs():
    # Define the appropriate reader for a filesystem-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)

    # Read contents of given path
    path = _get_resource_path("kfp_test_operator_no_inputs.yaml")
    catalog_entry_data = {"path": path}

    # Construct a catalog instance
    catalog_type = "local-file-catalog"
    catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type,
                                                metadata={
                                                    "categories": ["Test"],
                                                    "runtime_type":
                                                    RUNTIME_PROCESSOR.name
                                                })

    # Build the catalog entry data structures required for parsing
    entry_data = reader.get_entry_data(catalog_entry_data, {})
    catalog_entry = CatalogEntry(entry_data, catalog_entry_data,
                                 catalog_instance, ["path"])

    # Parse the component entry
    parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR)
    component = parser.parse(catalog_entry)[0]
    properties_json = ComponentCache.to_canvas_properties(component)

    # Properties JSON should only include the two parameters common to every
    # component:'label' and 'component_source', the component description if
    # exists (which it does for this component), and the output parameter for
    # this component
    num_common_params = 4
    assert len(
        properties_json["current_parameters"].keys()) == num_common_params
    assert len(properties_json["parameters"]) == num_common_params
    assert len(
        properties_json["uihints"]["parameter_info"]) == num_common_params

    # Total number of groups includes one for each parameter,
    # plus one for the output group header,
    # plus 1 for the component_source header
    num_groups = num_common_params + 2
    assert len(properties_json["uihints"]["group_info"][0]
               ["group_info"]) == num_groups

    # Ensure that template still renders the two common parameters correctly
    assert properties_json["current_parameters"]["label"] == ""

    component_source = json.dumps({
        "catalog_type":
        catalog_type,
        "component_ref":
        catalog_entry.entry_reference
    })
    assert properties_json["current_parameters"][
        "component_source"] == component_source
Example #8
0
    async def put(self):

        # Validate the body
        cache_refresh = self.get_json_body()
        if "action" not in cache_refresh or cache_refresh[
                "action"] != "refresh":
            raise web.HTTPError(
                400, reason="A body of {'action': 'refresh'} is required!")

        try:
            self.log.debug(
                "Refreshing component cache for all catalog instances...")
            ComponentCache.instance().refresh()
            self.set_status(204)
        except RefreshInProgressError as ripe:
            self.set_status(409, str(ripe))

        await self.finish()
def test_parse_kfp_component_url():
    # Define the appropriate reader for a URL-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = UrlComponentCatalogConnector(kfp_supported_file_types)

    # Read contents of given path
    url = "https://raw.githubusercontent.com/kubeflow/pipelines/1.4.1/components/notebooks/Run_notebook_using_papermill/component.yaml"  # noqa: E501
    catalog_entry_data = {"url": url}

    # Construct a catalog instance
    catalog_type = "url-catalog"
    catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type,
                                                metadata={
                                                    "categories": ["Test"],
                                                    "runtime_type":
                                                    RUNTIME_PROCESSOR.name
                                                })

    # Build the catalog entry data structures required for parsing
    entry_data = reader.get_entry_data(catalog_entry_data, {})
    catalog_entry = CatalogEntry(entry_data, catalog_entry_data,
                                 catalog_instance, ["url"])

    # Parse the component entry
    parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR)
    component = parser.parse(catalog_entry)[0]
    properties_json = ComponentCache.to_canvas_properties(component)

    # Ensure component parameters are prefixed (and system parameters are not) and all hold correct values
    assert properties_json["current_parameters"]["label"] == ""

    component_source = json.dumps({
        "catalog_type":
        catalog_type,
        "component_ref":
        catalog_entry.entry_reference
    })
    assert properties_json["current_parameters"][
        "component_source"] == component_source
    assert properties_json["current_parameters"][
        "elyra_notebook"] == "None"  # Default value for type `inputpath`
    assert properties_json["current_parameters"]["elyra_parameters"] == {
        "StringControl": "{}",
        "activeControl": "StringControl",
    }
    assert properties_json["current_parameters"][
        "elyra_packages_to_install"] == {
            "StringControl": "[]",
            "activeControl": "StringControl",
        }
    assert properties_json["current_parameters"]["elyra_input_data"] == {
        "StringControl": "",
        "activeControl": "StringControl",
    }
Example #10
0
    async def get(self, runtime_type):
        self.log.debug(
            f"Retrieving pipeline components for runtime type: {runtime_type}")

        runtime_processor_type = get_runtime_processor_type(
            runtime_type, self.log, self.request.path)
        if not runtime_processor_type:
            raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'")

        # Include generic components for all runtime types
        components: List[Component] = ComponentCache.get_generic_components()

        # Add additional runtime-type-specific components, if present
        components.extend(ComponentCache.instance().get_all_components(
            platform=runtime_processor_type))

        palette_json = ComponentCache.to_canvas_palette(components=components)

        self.set_status(200)
        self.set_header("Content-Type", "application/json")
        await self.finish(palette_json)
Example #11
0
    async def get(self, runtime_type, component_id):
        self.log.debug(
            f"Retrieving pipeline component properties for component: {component_id}"
        )

        if not component_id:
            raise web.HTTPError(400, "Missing component ID")

        runtime_processor_type = get_runtime_processor_type(
            runtime_type, self.log, self.request.path)
        if not runtime_processor_type:
            raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'")

        # Try to get component_id as a generic component; assigns None if id is not a generic component
        component: Optional[Component] = ComponentCache.get_generic_component(
            component_id)

        # Try to retrieve a runtime-type-specific component; assigns None if not found
        if not component:
            component = ComponentCache.instance().get_component(
                platform=runtime_processor_type, component_id=component_id)

        if not component:
            raise web.HTTPError(404, f"Component '{component_id}' not found")

        if self.request.path.endswith("/properties"):
            # Return complete set of component properties
            json_response = ComponentCache.to_canvas_properties(component)
        else:
            # Return component definition content
            json_response = json.dumps({
                "content":
                component.definition,
                "mimeType":
                self.get_mimetype(component.file_extension)
            })

        self.set_status(200)
        self.set_header("Content-Type", "application/json")
        await self.finish(json_response)
Example #12
0
    async def put(self, catalog):

        # Validate the body
        cache_refresh = self.get_json_body()
        if "action" not in cache_refresh or cache_refresh[
                "action"] != "refresh":
            raise web.HTTPError(
                400, reason="A body of {'action': 'refresh'} is required.")

        try:
            # Ensure given catalog name is a metadata instance
            catalog_instance = MetadataManager(
                schemaspace=ComponentCatalogs.COMPONENT_CATALOGS_SCHEMASPACE_ID
            ).get(name=catalog)
        except MetadataNotFoundError:
            raise web.HTTPError(404, f"Catalog '{catalog}' cannot be found.")

        self.log.debug(
            f"Refreshing component cache for catalog with name '{catalog}'...")
        ComponentCache.instance().update(catalog=catalog_instance,
                                         action="modify")
        self.set_status(204)

        await self.finish()
def test_parse_airflow_component_url():
    # Define the appropriate reader for a URL-type component definition
    airflow_supported_file_types = [".py"]
    reader = UrlComponentCatalogConnector(airflow_supported_file_types)

    # Read contents of given path
    url = "https://raw.githubusercontent.com/apache/airflow/1.10.15/airflow/operators/bash_operator.py"  # noqa: E501
    catalog_entry_data = {"url": url}

    # Construct a catalog instance
    catalog_type = "url-catalog"
    catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type,
                                                metadata={
                                                    "categories": ["Test"],
                                                    "runtime_type":
                                                    RUNTIME_PROCESSOR.name
                                                })

    # Build the catalog entry data structures required for parsing
    entry_data = reader.get_entry_data(catalog_entry_data, {})
    catalog_entry = CatalogEntry(entry_data, catalog_entry_data,
                                 catalog_instance, ["url"])

    # Parse the component entry
    parser = ComponentParser.create_instance(platform=RUNTIME_PROCESSOR)
    component = parser.parse(catalog_entry)[0]
    properties_json = ComponentCache.to_canvas_properties(component)

    # Ensure component parameters are prefixed, and system parameters are not, and hold correct values
    assert properties_json["current_parameters"]["label"] == ""

    # Helper method to retrieve the requested parameter value from the dictionary
    def get_parameter(param_name):
        property_dict = properties_json["current_parameters"][param_name]
        return property_dict[property_dict["activeControl"]]

    component_source = json.dumps({
        "catalog_type":
        catalog_type,
        "component_ref":
        catalog_entry.entry_reference
    })
    assert properties_json["current_parameters"][
        "component_source"] == component_source
    assert get_parameter("elyra_bash_command") == ""
    assert get_parameter("elyra_xcom_push") is True
    assert get_parameter("elyra_env") == "{}"  # {}
    assert get_parameter("elyra_output_encoding") == "utf-8"
Example #14
0
    async def _get_component_properties(self, pipeline_runtime: str, components: dict, node_op: str) -> Dict:
        """
        Retrieve the full dict of properties associated with the node_op
        :param components: list of components associated with the pipeline runtime being used e.g. kfp, airflow
        :param node_op: the node operation e.g. execute-notebook-node
        :return: a list of property names associated with the node op
        """

        if node_op == "execute-notebook-node":
            node_op = "notebooks"
        elif node_op == "execute-r-node":
            node_op = "r-script"
        elif node_op == "execute-python-node":
            node_op = "python-script"
        for category in components["categories"]:
            for node_type in category["node_types"]:
                if node_op == node_type["op"]:
                    component: Component = await PipelineProcessorManager.instance().get_component(
                        pipeline_runtime, node_op
                    )
                    component_properties = ComponentCache.to_canvas_properties(component)
                    return component_properties

        return {}
Example #15
0
 def get_components(self):
     return ComponentCache.get_generic_components()
def test_parse_kfp_component_file():
    # Define the appropriate reader for a filesystem-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)

    # Read contents of given path
    path = _get_resource_path("kfp_test_operator.yaml")
    catalog_entry_data = {"path": path}

    # Construct a catalog instance
    catalog_type = "local-file-catalog"
    catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type,
                                                metadata={
                                                    "categories": ["Test"],
                                                    "runtime_type":
                                                    RUNTIME_PROCESSOR.name
                                                })

    # Build the catalog entry data structures required for parsing
    entry_data = reader.get_entry_data(catalog_entry_data, {})
    catalog_entry = CatalogEntry(entry_data, catalog_entry_data,
                                 catalog_instance, ["path"])

    # Parse the component entry
    parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR)
    component = parser.parse(catalog_entry)[0]
    properties_json = ComponentCache.to_canvas_properties(component)

    # Ensure description is rendered even with an unescaped character
    description = 'This component description contains an unescaped " character'
    assert properties_json["current_parameters"][
        "component_description"] == description

    # Ensure component parameters are prefixed (and system parameters are not) and all hold correct values
    assert properties_json["current_parameters"]["label"] == ""

    component_source = json.dumps({
        "catalog_type":
        catalog_type,
        "component_ref":
        catalog_entry.entry_reference
    })
    assert properties_json["current_parameters"][
        "component_source"] == component_source
    assert properties_json["current_parameters"][
        "elyra_test_string_no_default"] == {
            "StringControl": "",
            "activeControl": "StringControl",
        }

    assert properties_json["current_parameters"][
        "elyra_test_string_default_value"] == {
            "StringControl": "default",
            "activeControl": "StringControl",
        }
    assert properties_json["current_parameters"][
        "elyra_test_string_default_empty"] == {
            "StringControl": "",
            "activeControl": "StringControl",
        }

    assert properties_json["current_parameters"][
        "elyra_test_bool_default"] == {
            "BooleanControl": False,
            "activeControl": "BooleanControl",
        }
    assert properties_json["current_parameters"]["elyra_test_bool_false"] == {
        "BooleanControl": False,
        "activeControl": "BooleanControl",
    }
    assert properties_json["current_parameters"]["elyra_test_bool_true"] == {
        "BooleanControl": True,
        "activeControl": "BooleanControl",
    }

    assert properties_json["current_parameters"]["elyra_test_int_default"] == {
        "NumberControl": 0,
        "activeControl": "NumberControl",
    }
    assert properties_json["current_parameters"]["elyra_test_int_zero"] == {
        "NumberControl": 0,
        "activeControl": "NumberControl",
    }
    assert properties_json["current_parameters"][
        "elyra_test_int_non_zero"] == {
            "NumberControl": 1,
            "activeControl": "NumberControl",
        }

    assert properties_json["current_parameters"][
        "elyra_test_float_default"] == {
            "NumberControl": 0.0,
            "activeControl": "NumberControl",
        }
    assert properties_json["current_parameters"]["elyra_test_float_zero"] == {
        "NumberControl": 0.0,
        "activeControl": "NumberControl",
    }
    assert properties_json["current_parameters"][
        "elyra_test_float_non_zero"] == {
            "NumberControl": 1.0,
            "activeControl": "NumberControl",
        }

    assert properties_json["current_parameters"][
        "elyra_test_dict_default"] == {
            "StringControl": "{}",
            "activeControl": "StringControl",
        }  # {}
    assert properties_json["current_parameters"][
        "elyra_test_list_default"] == {
            "StringControl": "[]",
            "activeControl": "StringControl",
        }  # []

    # Ensure that the 'required' attribute was set correctly. KFP components default to required
    # unless explicitly marked otherwise in component YAML.
    required_property = next(
        prop for prop in properties_json["uihints"]["parameter_info"]
        if prop.get("parameter_ref") == "elyra_test_required_property")
    assert required_property["data"]["required"] is True

    optional_property = next(
        prop for prop in properties_json["uihints"]["parameter_info"]
        if prop.get("parameter_ref") == "elyra_test_optional_property")
    assert optional_property["data"]["required"] is False

    default_required_property = next(
        prop for prop in properties_json["uihints"]["parameter_info"]
        if prop.get("parameter_ref") == "elyra_test_required_property_default")
    assert default_required_property["data"]["required"] is True

    # Ensure that type information is inferred correctly
    unusual_dict_property = next(
        prop for prop in properties_json["uihints"]["parameter_info"]
        if prop.get("parameter_ref") == "elyra_test_unusual_type_dict")
    assert unusual_dict_property["data"]["controls"]["StringControl"][
        "format"] == "dictionary"

    unusual_list_property = next(
        prop for prop in properties_json["uihints"]["parameter_info"]
        if prop.get("parameter_ref") == "elyra_test_unusual_type_list")
    assert unusual_list_property["data"]["controls"]["StringControl"][
        "format"] == "list"

    unusual_string_property = next(
        prop for prop in properties_json["uihints"]["parameter_info"]
        if prop.get("parameter_ref") == "elyra_test_unusual_type_string")
    assert unusual_string_property["data"]["controls"]["StringControl"][
        "format"] == "string"

    file_property = next(
        prop for prop in properties_json["uihints"]["parameter_info"]
        if prop.get("parameter_ref") == "elyra_test_unusual_type_file")
    assert file_property["data"]["format"] == "inputpath"

    no_type_property = next(
        prop for prop in properties_json["uihints"]["parameter_info"]
        if prop.get("parameter_ref") == "elyra_test_unusual_type_notgiven")
    assert no_type_property["data"]["controls"]["StringControl"][
        "format"] == "string"

    # Ensure descriptions are rendered properly with type hint in parentheses
    assert (unusual_dict_property["description"]["default"] ==
            "The test command description "
            "(type: Dictionary of arrays)")
    assert unusual_list_property["description"][
        "default"] == "The test command description (type: An array)"
    assert unusual_string_property["description"][
        "default"] == "The test command description (type: A string)"
    assert (
        file_property["description"]["default"] ==
        "The test command description"
    )  # No data type info is included in parentheses for inputPath variables
    assert no_type_property["description"][
        "default"] == "The test command description (type: string)"
async def test_parse_components_additional_metatypes():
    # Define the appropriate reader for a URL-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = UrlComponentCatalogConnector(kfp_supported_file_types)

    # Read contents of given path
    url = "https://raw.githubusercontent.com/kubeflow/pipelines/1.4.1/components/keras/Train_classifier/from_CSV/component.yaml"  # noqa: E501
    catalog_entry_data = {"url": url}

    # Construct a catalog instance
    catalog_type = "url-catalog"
    catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type,
                                                metadata={
                                                    "categories": ["Test"],
                                                    "runtime_type":
                                                    RUNTIME_PROCESSOR.name
                                                })

    # Build the catalog entry data structures required for parsing
    entry_data = reader.get_entry_data(catalog_entry_data, {})
    catalog_entry = CatalogEntry(entry_data, catalog_entry_data,
                                 catalog_instance, ["url"])

    # Parse the component entry
    parser = KfpComponentParser()
    component = parser.parse(catalog_entry)[0]
    properties_json = ComponentCache.to_canvas_properties(component)

    # Ensure component parameters are prefixed (and system parameters are not) and all hold correct values
    assert properties_json["current_parameters"]["label"] == ""

    component_source = json.dumps({
        "catalog_type":
        catalog_type,
        "component_ref":
        catalog_entry.entry_reference
    })
    assert properties_json["current_parameters"][
        "component_source"] == component_source
    assert properties_json["current_parameters"][
        "elyra_training_features"] == "None"  # inputPath
    assert properties_json["current_parameters"][
        "elyra_training_labels"] == "None"  # inputPath
    assert properties_json["current_parameters"][
        "elyra_network_json"] == "None"  # inputPath
    assert properties_json["current_parameters"]["elyra_loss_name"] == {
        "StringControl": "categorical_crossentropy",
        "activeControl": "StringControl",
    }
    assert properties_json["current_parameters"]["elyra_num_classes"] == {
        "NumberControl": 0,
        "activeControl": "NumberControl",
    }
    assert properties_json["current_parameters"]["elyra_optimizer"] == {
        "StringControl": "rmsprop",
        "activeControl": "StringControl",
    }
    assert properties_json["current_parameters"]["elyra_optimizer_config"] == {
        "StringControl": "",
        "activeControl": "StringControl",
    }
    assert properties_json["current_parameters"]["elyra_learning_rate"] == {
        "NumberControl": 0.01,
        "activeControl": "NumberControl",
    }
    assert properties_json["current_parameters"]["elyra_num_epochs"] == {
        "NumberControl": 100,
        "activeControl": "NumberControl",
    }
    assert properties_json["current_parameters"]["elyra_batch_size"] == {
        "NumberControl": 32,
        "activeControl": "NumberControl",
    }
    assert properties_json["current_parameters"]["elyra_metrics"] == {
        "StringControl": "['accuracy']",
        "activeControl": "StringControl",
    }
    assert properties_json["current_parameters"]["elyra_random_seed"] == {
        "NumberControl": 0,
        "activeControl": "NumberControl",
    }
Example #18
0
def _build_component_cache():
    """Initialize a ComponentCache instance and wait for it to complete all tasks"""
    with Spinner(text="Initializing the component cache..."):
        component_cache = ComponentCache.instance(emulate_server_app=True)
        component_cache.load()
        component_cache.wait_for_all_cache_tasks()
Example #19
0
    def _cc_pipeline(
        self, pipeline, pipeline_name, pipeline_version="", experiment_name="", cos_directory=None, export=False
    ):

        runtime_configuration = self._get_metadata_configuration(
            schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config
        )

        cos_endpoint = runtime_configuration.metadata["cos_endpoint"]
        cos_username = runtime_configuration.metadata.get("cos_username")
        cos_password = runtime_configuration.metadata.get("cos_password")
        cos_secret = runtime_configuration.metadata.get("cos_secret")
        cos_bucket = runtime_configuration.metadata.get("cos_bucket")
        if cos_directory is None:
            cos_directory = pipeline_name

        engine = runtime_configuration.metadata["engine"]

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} " f"bucket: {cos_bucket} folder: {cos_directory}",
        )
        t0_all = time.time()

        emptydir_volume_size = ""
        container_runtime = bool(os.getenv("CRIO_RUNTIME", "False").lower() == "true")

        # Create dictionary that maps component Id to its ContainerOp instance
        target_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(pipeline.operations)

        # Determine whether access to cloud storage is required
        for operation in sorted_operations:
            if isinstance(operation, GenericOperation):
                self._verify_cos_connectivity(runtime_configuration)
                break

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(pipeline, sorted_operations)

        for operation in sorted_operations:

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = "20Gi"

            sanitized_operation_name = self._sanitize_operation_name(operation.name)

            # Create pipeline operation
            # If operation is one of the "generic" set of NBs or scripts, construct custom ExecuteFileOp
            if isinstance(operation, GenericOperation):

                # Collect env variables
                pipeline_envs = self._collect_envs(
                    operation, cos_secret=cos_secret, cos_username=cos_username, cos_password=cos_password
                )

                operation_artifact_archive = self._get_dependency_archive_name(operation)

                self.log.debug(f"Creating pipeline component:\n {operation} archive : {operation_artifact_archive}")

                target_ops[operation.id] = ExecuteFileOp(
                    name=sanitized_operation_name,
                    pipeline_name=pipeline_name,
                    experiment_name=experiment_name,
                    notebook=operation.filename,
                    cos_endpoint=cos_endpoint,
                    cos_bucket=cos_bucket,
                    cos_directory=cos_directory,
                    cos_dependencies_archive=operation_artifact_archive,
                    pipeline_version=pipeline_version,
                    pipeline_source=pipeline.source,
                    pipeline_inputs=operation.inputs,
                    pipeline_outputs=operation.outputs,
                    pipeline_envs=pipeline_envs,
                    emptydir_volume_size=emptydir_volume_size,
                    cpu_request=operation.cpu,
                    mem_request=operation.memory,
                    gpu_limit=operation.gpu,
                    workflow_engine=engine,
                    image=operation.runtime_image,
                    file_outputs={
                        "mlpipeline-metrics": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-metrics.json",  # noqa
                        "mlpipeline-ui-metadata": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-ui-metadata.json",  # noqa
                    },
                )

                if operation.doc:
                    target_ops[operation.id].add_pod_annotation("elyra/node-user-doc", operation.doc)

                # TODO Can we move all of this to apply to non-standard components as well? Test when servers are up
                if cos_secret and not export:
                    target_ops[operation.id].apply(use_aws_secret(cos_secret))

                image_namespace = self._get_metadata_configuration(RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID)
                for image_instance in image_namespace:
                    if image_instance.metadata["image_name"] == operation.runtime_image and image_instance.metadata.get(
                        "pull_policy"
                    ):
                        target_ops[operation.id].container.set_image_pull_policy(image_instance.metadata["pull_policy"])

                self.log_pipeline_info(
                    pipeline_name,
                    f"processing operation dependencies for id: {operation.id}",
                    operation_name=operation.name,
                )

                self._upload_dependencies_to_object_store(runtime_configuration, cos_directory, operation)

            # If operation is a "non-standard" component, load it's spec and create operation with factory function
            else:
                # Retrieve component from cache
                component = ComponentCache.instance().get_component(self._type, operation.classifier)

                # Convert the user-entered value of certain properties according to their type
                for component_property in component.properties:
                    # Get corresponding property's value from parsed pipeline
                    property_value = operation.component_params.get(component_property.ref)

                    self.log.debug(
                        f"Processing component parameter '{component_property.name}' "
                        f"of type '{component_property.data_type}'"
                    )

                    if component_property.data_type == "inputpath":
                        output_node_id = property_value["value"]
                        output_node_parameter_key = property_value["option"].replace("elyra_output_", "")
                        operation.component_params[component_property.ref] = target_ops[output_node_id].outputs[
                            output_node_parameter_key
                        ]
                    elif component_property.data_type == "inputvalue":
                        active_property = property_value["activeControl"]
                        active_property_value = property_value.get(active_property, None)

                        # If the value is not found, assign it the default value assigned in parser
                        if active_property_value is None:
                            active_property_value = component_property.value

                        if isinstance(active_property_value, dict) and set(active_property_value.keys()) == {
                            "value",
                            "option",
                        }:
                            output_node_id = active_property_value["value"]
                            output_node_parameter_key = active_property_value["option"].replace("elyra_output_", "")
                            operation.component_params[component_property.ref] = target_ops[output_node_id].outputs[
                                output_node_parameter_key
                            ]
                        elif component_property.default_data_type == "dictionary":
                            processed_value = self._process_dictionary_value(active_property_value)
                            operation.component_params[component_property.ref] = processed_value
                        elif component_property.default_data_type == "list":
                            processed_value = self._process_list_value(active_property_value)
                            operation.component_params[component_property.ref] = processed_value
                        else:
                            operation.component_params[component_property.ref] = active_property_value

                # Build component task factory
                try:
                    factory_function = components.load_component_from_text(component.definition)
                except Exception as e:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(f"Error loading component spec for {operation.name}: {str(e)}")
                    raise RuntimeError(f"Error loading component spec for {operation.name}.")

                # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict
                try:
                    comp_spec_inputs = [
                        inputs.name.lower().replace(" ", "_") for inputs in factory_function.component_spec.inputs
                    ]

                    # Remove inputs and outputs from params dict
                    # TODO: need to have way to retrieve only required params
                    parameter_removal_list = ["inputs", "outputs"]
                    for component_param in operation.component_params_as_dict.keys():
                        if component_param not in comp_spec_inputs:
                            parameter_removal_list.append(component_param)

                    for parameter in parameter_removal_list:
                        operation.component_params_as_dict.pop(parameter, None)

                    # Create ContainerOp instance and assign appropriate user-provided name
                    sanitized_component_params = {
                        self._sanitize_param_name(name): value
                        for name, value in operation.component_params_as_dict.items()
                    }
                    container_op = factory_function(**sanitized_component_params)
                    container_op.set_display_name(operation.name)

                    if operation.doc:
                        container_op.add_pod_annotation("elyra/node-user-doc", operation.doc)

                    target_ops[operation.id] = container_op
                except Exception as e:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(f"Error constructing component {operation.name}: {str(e)}")
                    raise RuntimeError(f"Error constructing component {operation.name}.")

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = target_ops[operation.id]
            for parent_operation_id in operation.parent_operation_ids:
                parent_op = target_ops[parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all))

        return target_ops
def test_parse_airflow_component_file():
    # Define the appropriate reader for a filesystem-type component definition
    airflow_supported_file_types = [".py"]
    reader = FilesystemComponentCatalogConnector(airflow_supported_file_types)

    # Read contents of given path
    path = _get_resource_path("airflow_test_operator.py")
    catalog_entry_data = {"path": path}

    # Construct a catalog instance
    catalog_type = "local-file-catalog"
    catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type,
                                                metadata={
                                                    "categories": ["Test"],
                                                    "runtime_type":
                                                    RUNTIME_PROCESSOR.name
                                                })

    # Build the catalog entry data structures required for parsing
    entry_data = reader.get_entry_data(catalog_entry_data, {})
    catalog_entry = CatalogEntry(entry_data, catalog_entry_data,
                                 catalog_instance, ["path"])

    # Parse the component entry
    parser = ComponentParser.create_instance(platform=RUNTIME_PROCESSOR)
    components = parser.parse(catalog_entry)
    assert len(
        components
    ) == 3  # TestOperator, DeriveFromTestOperator, and DeriveFromImportedOperator

    # Split components list into its constituent operators
    components = sorted(components, key=lambda component: component.id)
    import_test_op, derive_test_op, test_op = components[0], components[
        1], components[2]

    # Helper method to retrieve the requested parameter value from the dictionary
    def get_parameter_value(param_name):
        property_dict = properties_json["current_parameters"][param_name]
        return property_dict[property_dict["activeControl"]]

    # Helper method to retrieve the requested parameter info from the dictionary
    def get_parameter_format(param_name, control_id="StringControl"):
        param_info = None
        for prop_info in properties_json["uihints"]["parameter_info"]:
            if prop_info.get("parameter_ref") == param_name:
                param_info = prop_info["data"]["controls"][control_id][
                    "format"]
                break

        return param_info

    # Helper method to retrieve the requested parameter description from the dictionary
    def get_parameter_description(param_name):
        param_desc = None
        for prop_info in properties_json["uihints"]["parameter_info"]:
            if prop_info.get("parameter_ref") == param_name:
                param_desc = prop_info["description"]["default"]
                break

        return param_desc

    # Helper method to retrieve whether the requested parameter is required
    def get_parameter_required(param_name):
        param_info = None
        for prop_info in properties_json["uihints"]["parameter_info"]:
            if prop_info.get("parameter_ref") == param_name:
                param_info = prop_info["data"]["required"]
                break

        return param_info

    # Retrieve properties for TestOperator
    # Test Operator does not include type hints for the init function args
    properties_json = ComponentCache.to_canvas_properties(test_op)

    # Ensure system parameters are not prefixed and hold correct values
    assert properties_json["current_parameters"]["label"] == ""

    component_source = json.dumps({
        "catalog_type":
        catalog_type,
        "component_ref":
        catalog_entry.entry_reference
    })
    assert properties_json["current_parameters"][
        "component_source"] == component_source

    # Ensure component parameters are prefixed with 'elyra_' and values are as expected
    assert get_parameter_value("elyra_str_no_default") == ""
    assert get_parameter_value("elyra_str_default") == "default"
    assert get_parameter_value("elyra_str_empty") == ""
    assert get_parameter_value("elyra_str_not_in_docstring") == ""

    assert get_parameter_value("elyra_bool_no_default") is False
    assert get_parameter_value("elyra_bool_default_false") is False
    assert get_parameter_value("elyra_bool_default_true") is True
    assert get_parameter_value("elyra_bool_not_in_docstring") is False

    assert get_parameter_value("elyra_int_no_default") == 0
    assert get_parameter_value("elyra_int_default_zero") == 0
    assert get_parameter_value("elyra_int_default_non_zero") == 2
    assert get_parameter_value("elyra_int_not_in_docstring") == 3

    assert get_parameter_value("elyra_dict_default_is_none") == "{}"  # {}
    assert get_parameter_value("elyra_list_default_is_none") == "[]"  # []

    # Ensure that type information is inferred correctly for properties that
    # define 'unusual' types, such as 'a dictionary of lists'
    assert get_parameter_format("elyra_unusual_type_dict") == "dictionary"
    assert get_parameter_format("elyra_unusual_type_list") == "list"

    # Ensure that type information falls back to string if no type hint present
    # and no ':type: <type info>' phrase found in docstring
    assert get_parameter_format("elyra_fallback_type") == "string"

    # Ensure component parameters are marked as required in the correct circumstances
    # (parameter is required if there is no default value provided or if a type hint
    # does not include 'Optional[...]')
    assert get_parameter_required("elyra_str_no_default") is True
    assert get_parameter_required("elyra_str_default") is False
    assert get_parameter_required("elyra_str_empty") is False

    # Ensure descriptions are rendered properly with type hint in parentheses
    assert (get_parameter_description("elyra_unusual_type_dict") ==
            "a dictionary parameter with the "
            "phrase 'list' in type description "
            "(type: a dictionary of arrays)")
    assert (get_parameter_description("elyra_unusual_type_list") ==
            "a list parameter with the phrase "
            "'string' in type description "
            "(type: a list of strings)")
    assert get_parameter_description("elyra_fallback_type") == "(type: str)"

    # Ensure that a long description with line wrapping and a backslash escape has rendered
    # (and hence did not raise an error during json.loads in the properties API request)
    parsed_description = """a string parameter with a very long description
        that wraps lines and also has an escaped underscore in it, as shown here: (\_)  # noqa W605"""
    modified_description = parsed_description.replace(
        "\n", " ") + " (type: str)"  # modify desc acc. to parser rules
    assert get_parameter_description(
        "elyra_long_description_property") == modified_description

    # Retrieve properties for DeriveFromTestOperator
    # DeriveFromTestOperator includes type hints for all init arguments
    properties_json = ComponentCache.to_canvas_properties(derive_test_op)

    # Ensure default values are parsed correct in the case where type hints are present
    assert get_parameter_value("elyra_str_default") == "default"
    assert get_parameter_value("elyra_bool_default") is True
    assert get_parameter_value("elyra_int_default") == 2

    # Ensure component parameters are prefixed with 'elyra_' and types are as expected
    # in the case when a type hint is provided (and regardless of whether or not the
    # parameter type is included in the docstring)
    assert get_parameter_format("elyra_str_no_default") == "string"
    assert get_parameter_format("elyra_str_default") == "string"
    assert get_parameter_format("elyra_str_optional_default") == "string"
    assert get_parameter_format("elyra_str_not_in_docstring") == "string"

    assert get_parameter_format("elyra_bool_no_default",
                                "BooleanControl") == "boolean"
    assert get_parameter_format("elyra_bool_default",
                                "BooleanControl") == "boolean"
    assert get_parameter_format("elyra_bool_not_in_docstring",
                                "BooleanControl") == "boolean"

    assert get_parameter_format("elyra_int_no_default",
                                "NumberControl") == "number"
    assert get_parameter_format("elyra_int_default",
                                "NumberControl") == "number"
    assert get_parameter_format("elyra_int_not_in_docstring",
                                "NumberControl") == "number"

    assert get_parameter_format("elyra_list_optional_default") == "list"

    # Ensure component parameters are marked as required in the correct circumstances
    assert get_parameter_required("elyra_str_no_default") is True
    assert get_parameter_required("elyra_str_default") is False
    assert get_parameter_required("elyra_str_optional_default") is False
    assert get_parameter_required("elyra_str_not_in_docstring") is True

    # Retrieve properties for DeriveFromImportedOperator
    # DeriveFromImportedOperator includes type hints for  dictionary and
    # list values to test the more complex parsing required in this case
    properties_json = ComponentCache.to_canvas_properties(import_test_op)

    # Ensure component parameters are prefixed with 'elyra_' and types are as expected
    assert get_parameter_format("elyra_dict_no_default") == "dictionary"
    assert get_parameter_format(
        "elyra_dict_optional_no_default") == "dictionary"
    assert get_parameter_format("elyra_nested_dict_default") == "dictionary"
    assert get_parameter_format("elyra_dict_not_in_docstring") == "dictionary"

    assert get_parameter_format("elyra_list_no_default") == "list"
    assert get_parameter_format("elyra_list_optional_no_default") == "list"
    assert get_parameter_format("elyra_list_default") == "list"
    assert get_parameter_format("elyra_list_optional_default") == "list"
    assert get_parameter_format("elyra_list_not_in_docstring") == "list"

    assert get_parameter_value("elyra_dict_no_default") == "{}"
    assert get_parameter_value("elyra_list_no_default") == "[]"
Example #21
0
    def _cc_pipeline(self, pipeline, pipeline_name):

        runtime_configuration = self._get_metadata_configuration(
            schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID,
            name=pipeline.runtime_config)
        image_namespace = self._get_metadata_configuration(
            schemaspace=RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID)

        cos_endpoint = runtime_configuration.metadata.get("cos_endpoint")
        cos_username = runtime_configuration.metadata.get("cos_username")
        cos_password = runtime_configuration.metadata.get("cos_password")
        cos_secret = runtime_configuration.metadata.get("cos_secret")
        cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata.get("cos_bucket")

        # Create dictionary that maps component Id to its ContainerOp instance
        target_ops = []

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} "
            f"bucket: {cos_bucket} folder: {pipeline_name}",
        )

        t0_all = time.time()

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(
            pipeline.operations)

        # Determine whether access to cloud storage is required and check connectivity
        for operation in sorted_operations:
            if isinstance(operation, GenericOperation):
                self._verify_cos_connectivity(runtime_configuration)
                break

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(
            pipeline, sorted_operations)

        # Scrub all node labels of invalid characters
        scrubbed_operations = self._scrub_invalid_characters_from_list(
            sorted_operations)
        # Generate unique names for all operations
        unique_operations = self._create_unique_node_names(scrubbed_operations)

        for operation in unique_operations:

            if isinstance(operation, GenericOperation):
                operation_artifact_archive = self._get_dependency_archive_name(
                    operation)

                self.log.debug(
                    f"Creating pipeline component:\n {operation} archive : {operation_artifact_archive}"
                )

                # Collect env variables
                pipeline_envs = self._collect_envs(operation,
                                                   cos_secret=cos_secret,
                                                   cos_username=cos_username,
                                                   cos_password=cos_password)

                # Generate unique ELYRA_RUN_NAME value and expose it as an
                # environment variable in the container.
                # Notebook | script nodes are implemented using the kubernetes_pod_operator
                # (https://airflow.apache.org/docs/apache-airflow/1.10.12/_api/airflow/contrib/operators/kubernetes_pod_operator/index.html)
                # Environment variables that are passed to this operator are
                # pre-processed by Airflow at runtime and placeholder values (expressed as '{{ xyz }}'
                #  - see https://airflow.apache.org/docs/apache-airflow/1.10.12/macros-ref#default-variables)
                # replaced.
                if pipeline_envs is None:
                    pipeline_envs = {}
                pipeline_envs[
                    "ELYRA_RUN_NAME"] = f"{pipeline_name}-{{{{ ts_nodash }}}}"

                image_pull_policy = None
                runtime_image_pull_secret = None
                for image_instance in image_namespace:
                    if image_instance.metadata[
                            "image_name"] == operation.runtime_image:
                        if image_instance.metadata.get("pull_policy"):
                            image_pull_policy = image_instance.metadata[
                                "pull_policy"]
                        if image_instance.metadata.get("pull_secret"):
                            runtime_image_pull_secret = image_instance.metadata[
                                "pull_secret"]
                        break

                bootscript = BootscriptBuilder(
                    filename=operation.filename,
                    cos_endpoint=cos_endpoint,
                    cos_bucket=cos_bucket,
                    cos_directory=cos_directory,
                    cos_dependencies_archive=operation_artifact_archive,
                    inputs=operation.inputs,
                    outputs=operation.outputs,
                )

                target_op = {
                    "notebook": operation.name,
                    "id": operation.id,
                    "argument_list": bootscript.container_cmd,
                    "runtime_image": operation.runtime_image,
                    "pipeline_envs": pipeline_envs,
                    "parent_operation_ids": operation.parent_operation_ids,
                    "image_pull_policy": image_pull_policy,
                    "cpu_request": operation.cpu,
                    "mem_request": operation.memory,
                    "gpu_limit": operation.gpu,
                    "operator_source": operation.component_params["filename"],
                    "is_generic_operator": True,
                    "doc": operation.doc,
                }

                if runtime_image_pull_secret is not None:
                    target_op[
                        "runtime_image_pull_secret"] = runtime_image_pull_secret

                target_ops.append(target_op)

                self.log_pipeline_info(
                    pipeline_name,
                    f"processing operation dependencies for id: {operation.id}",
                    operation_name=operation.name,
                )

                self._upload_dependencies_to_object_store(
                    runtime_configuration, pipeline_name, operation)

            else:
                # Retrieve component from cache
                component = ComponentCache.instance().get_component(
                    self._type, operation.classifier)

                # Convert the user-entered value of certain properties according to their type
                for component_property in component.properties:
                    # Skip properties for which no value was given
                    if component_property.ref not in operation.component_params.keys(
                    ):
                        continue

                    # Get corresponding property's value from parsed pipeline
                    property_value_dict = operation.component_params.get(
                        component_property.ref)

                    # The type and value of this property can vary depending on what the user chooses
                    # in the pipeline editor. So we get the current active parameter (e.g. StringControl)
                    # from the activeControl value
                    active_property_name = property_value_dict["activeControl"]

                    # One we have the value (e.g. StringControl) we use can retrieve the value
                    # assigned to it
                    property_value = property_value_dict.get(
                        active_property_name, None)

                    # If the value is not found, assign it the default value assigned in parser
                    if property_value is None:
                        property_value = component_property.value

                    self.log.debug(
                        f"Active property name : {active_property_name}, value : {property_value}"
                    )
                    self.log.debug(
                        f"Processing component parameter '{component_property.name}' "
                        f"of type '{component_property.data_type}'")

                    if (property_value and str(property_value)[0] == "{"
                            and str(property_value)[-1] == "}" and isinstance(
                                json.loads(json.dumps(property_value)), dict)
                            and set(
                                json.loads(json.dumps(property_value)).keys())
                            == {"value", "option"}):
                        parent_node_name = self._get_node_name(
                            target_ops,
                            json.loads(json.dumps(property_value))["value"])
                        processed_value = "\"{{ ti.xcom_pull(task_ids='" + parent_node_name + "') }}\""
                        operation.component_params[
                            component_property.ref] = processed_value
                    elif component_property.data_type == "boolean":
                        operation.component_params[
                            component_property.ref] = property_value
                    elif component_property.data_type == "string":
                        # Add surrounding quotation marks to string value for correct rendering
                        # in jinja DAG template
                        operation.component_params[
                            component_property.ref] = json.dumps(
                                property_value)
                    elif component_property.data_type == "dictionary":
                        processed_value = self._process_dictionary_value(
                            property_value)
                        operation.component_params[
                            component_property.ref] = processed_value
                    elif component_property.data_type == "list":
                        processed_value = self._process_list_value(
                            property_value)
                        operation.component_params[
                            component_property.ref] = processed_value

                # Remove inputs and outputs from params dict until support for data exchange is provided
                operation.component_params_as_dict.pop("inputs")
                operation.component_params_as_dict.pop("outputs")

                # Locate the import statement. If not found raise...
                import_stmts = []
                # Check for import statement on Component object, otherwise get from class_import_map
                import_stmt = component.import_statement or self.class_import_map.get(
                    component.name)
                if import_stmt:
                    import_stmts.append(import_stmt)
                else:
                    # If we didn't find a mapping to the import statement, let's check if the component
                    # name includes a package prefix.  If it does, log a warning, but proceed, otherwise
                    # raise an exception.
                    if len(component.name.split(
                            ".")) > 1:  # We (presumably) have a package prefix
                        self.log.warning(
                            f"Operator '{component.name}' of node '{operation.name}' is not configured "
                            f"in the list of available Airflow operators but appears to include a "
                            f"package prefix and processing will proceed.")
                    else:
                        raise ValueError(
                            f"Operator '{component.name}' of node '{operation.name}' is not configured "
                            f"in the list of available operators.  Please add the fully-qualified "
                            f"package name for '{component.name}' to the "
                            f"AirflowPipelineProcessor.available_airflow_operators configuration."
                        )

                target_op = {
                    "notebook": operation.name,
                    "id": operation.id,
                    "imports": import_stmts,
                    "class_name": component.name,
                    "parent_operation_ids": operation.parent_operation_ids,
                    "component_params": operation.component_params_as_dict,
                    "operator_source": component.component_source,
                    "is_generic_operator": False,
                    "doc": operation.doc,
                }

                target_ops.append(target_op)

        ordered_target_ops = OrderedDict()

        while target_ops:
            for i in range(len(target_ops)):
                target_op = target_ops.pop(0)
                if not target_op["parent_operation_ids"]:
                    ordered_target_ops[target_op["id"]] = target_op
                    self.log.debug("Root Node added : %s",
                                   ordered_target_ops[target_op["id"]])
                elif all(deps in ordered_target_ops.keys()
                         for deps in target_op["parent_operation_ids"]):
                    ordered_target_ops[target_op["id"]] = target_op
                    self.log.debug("Dependent Node added : %s",
                                   ordered_target_ops[target_op["id"]])
                else:
                    target_ops.append(target_op)

        self.log_pipeline_info(pipeline_name,
                               "pipeline dependencies processed",
                               duration=(time.time() - t0_all))

        return ordered_target_ops
Example #22
0
def pipeline():
    ComponentCache.instance().wait_for_all_cache_tasks()
    pipeline_resource = _read_pipeline_resource(
        "resources/sample_pipelines/pipeline_3_node_sample.json")
    return PipelineParser.parse(pipeline_resource)
Example #23
0
    async def _validate_compatibility(
        self,
        pipeline_definition: PipelineDefinition,
        pipeline_type: str,
        pipeline_runtime: str,
        response: ValidationResponse,
    ) -> None:
        """
        Checks that the pipeline payload is compatible with this version of elyra (ISSUE #938)
        as well as verifying all nodes in the pipeline are supported by the runtime
        :param pipeline_definition: the pipeline definition to be validated
        :param pipeline_type: type of the pipeline runtime being used e.g. KUBEFLOW_PIPELINES, APACHE_AIRFLOW, generic
        :param pipeline_runtime: name of the pipeline runtime for execution  e.g. kfp, airflow, local
        :param response: ValidationResponse containing the issue list to be updated
        """

        primary_pipeline_id = pipeline_definition.primary_pipeline.id
        supported_ops = []

        if pipeline_runtime:
            if not PipelineValidationManager._is_compatible_pipeline(pipeline_runtime, pipeline_type):
                response.add_message(
                    severity=ValidationSeverity.Error,
                    message_type="invalidRuntime",
                    message="Pipeline runtime platform is not compatible " "with selected runtime configuration.",
                    data={
                        "pipelineID": primary_pipeline_id,
                        "pipelineType": pipeline_type,
                        "pipelineRuntime": pipeline_runtime,
                    },
                )
            elif PipelineProcessorManager.instance().is_supported_runtime(pipeline_runtime):
                component_list = await PipelineProcessorManager.instance().get_components(pipeline_runtime)
                for component in component_list:
                    supported_ops.append(component.op)

                # Checks pipeline node types are compatible with the runtime selected
                for sub_pipeline in pipeline_definition.pipelines:
                    for node in sub_pipeline.nodes:
                        if node.op not in ComponentCache.get_generic_component_ops() and pipeline_runtime == "local":
                            response.add_message(
                                severity=ValidationSeverity.Error,
                                message_type="invalidNodeType",
                                message="This pipeline contains at least one runtime-specific "
                                "component, but pipeline runtime is 'local'. Specify a "
                                "runtime config or remove runtime-specific components "
                                "from the pipeline",
                                data={"nodeID": node.id, "nodeOpName": node.op, "pipelineId": sub_pipeline.id},
                            )
                            break
                        if node.type == "execution_node" and node.op not in supported_ops:
                            response.add_message(
                                severity=ValidationSeverity.Error,
                                message_type="invalidNodeType",
                                message="This component was not found in the catalog. Please add it "
                                "to your component catalog or remove this node from the "
                                "pipeline",
                                data={
                                    "nodeID": node.id,
                                    "nodeOpName": node.op,
                                    "nodeName": node.label,
                                    "pipelineId": sub_pipeline.id,
                                },
                            )
            else:
                response.add_message(
                    severity=ValidationSeverity.Error,
                    message_type="invalidRuntime",
                    message="Unsupported pipeline runtime",
                    data={
                        "pipelineRuntime": pipeline_runtime,
                        "pipelineType": pipeline_type,
                        "pipelineId": primary_pipeline_id,
                    },
                )
Example #24
0
    async def _validate_custom_component_node_properties(
        self, node: Node, response: ValidationResponse, pipeline_definition: PipelineDefinition, pipeline_runtime: str
    ):
        """
        Validates the properties of the custom component node
        :param node: the node to be validated
        :param response: the validation response object to attach any error messages
        :param pipeline_definition: the pipeline definition containing the node
        :param pipeline_runtime: the pipeline runtime selected
        :return:
        """

        component_list = await PipelineProcessorManager.instance().get_components(pipeline_runtime)
        components = ComponentCache.to_canvas_palette(component_list)

        # Full dict of properties for the operation e.g. current params, optionals etc
        component_property_dict = await self._get_component_properties(pipeline_runtime, components, node.op)

        # List of just the current parameters for the component
        current_parameter_defaults_list = list(
            map(lambda x: str(x).replace("elyra_", ""), component_property_dict["current_parameters"].keys())
        )

        # Remove the non component_parameter jinja templated values we do not check against
        current_parameter_defaults_list.remove("component_source")
        current_parameter_defaults_list.remove("label")

        for default_parameter in current_parameter_defaults_list:
            node_param = node.get_component_parameter(default_parameter)
            if self._is_required_property(component_property_dict, default_parameter):
                if not node_param:
                    response.add_message(
                        severity=ValidationSeverity.Error,
                        message_type="invalidNodeProperty",
                        message="Node is missing required property.",
                        data={"nodeID": node.id, "nodeName": node.label, "propertyName": default_parameter},
                    )
                elif self._get_component_type(component_property_dict, default_parameter) == "inputpath":
                    # Any component property with type `InputPath` will be a dictionary of two keys
                    # "value": the node ID of the parent node containing the output
                    # "option": the name of the key (which is an output) of the above referenced node
                    if (
                        not isinstance(node_param, dict)
                        or len(node_param) != 2
                        or set(node_param.keys()) != {"value", "option"}
                    ):
                        response.add_message(
                            severity=ValidationSeverity.Error,
                            message_type="invalidNodeProperty",
                            message="Node has malformed `InputPath` parameter structure",
                            data={"nodeID": node.id, "nodeName": node.label},
                        )
                    node_ids = list(x.get("node_id_ref", None) for x in node.component_links)
                    parent_list = self._get_parent_id_list(pipeline_definition, node_ids, [])
                    node_param_value = node_param.get("value")
                    if node_param_value not in parent_list:
                        response.add_message(
                            severity=ValidationSeverity.Error,
                            message_type="invalidNodeProperty",
                            message="Node contains an invalid inputpath reference. Please "
                            "check your node-to-node connections",
                            data={"nodeID": node.id, "nodeName": node.label},
                        )
                elif isinstance(node_param, dict) and node_param.get("activeControl") == "NestedEnumControl":
                    if not node_param.get("NestedEnumControl"):
                        response.add_message(
                            severity=ValidationSeverity.Error,
                            message_type="invalidNodeProperty",
                            message="Node contains an invalid reference to an node output. Please "
                            "check the node properties are configured properly",
                            data={"nodeID": node.id, "nodeName": node.label},
                        )
                    else:
                        # TODO: Update this hardcoded check for xcom_push. This parameter is specific to a runtime
                        # (Airflow). i.e. abstraction for byo validation?
                        node_param_value = node_param["NestedEnumControl"].get("value")
                        upstream_node = pipeline_definition.get_node(node_param_value)
                        xcom_param = upstream_node.get_component_parameter("xcom_push")
                        if xcom_param:
                            xcom_value = xcom_param.get("BooleanControl")
                            if not xcom_value:
                                response.add_message(
                                    severity=ValidationSeverity.Error,
                                    message_type="invalidNodeProperty",
                                    message="Node contains an invalid input reference. The parent "
                                    "node does not have the xcom_push property enabled",
                                    data={
                                        "nodeID": node.id,
                                        "nodeName": node.label,
                                        "parentNodeID": upstream_node.label,
                                    },
                                )
Example #25
0
 async def stop_extension(self):
     if ComponentCache.initialized():
         ComponentCache.instance(parent=self).cache_manager.stop()  # terminate CacheUpdateManager
Example #26
0
def test_processing_filename_runtime_specific_component(
        monkeypatch, processor, sample_metadata, tmpdir):
    # Define the appropriate reader for a filesystem-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)

    # Assign test resource location
    absolute_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), "..", "resources",
                     "components", "download_data.yaml"))

    # Read contents of given path -- read_component_definition() returns a
    # a dictionary of component definition content indexed by path
    entry_data = reader.get_entry_data({"path": absolute_path}, {})
    component_definition = entry_data.definition

    # Instantiate a file-based component
    component_id = "test-component"
    component = Component(
        id=component_id,
        name="Download data",
        description="",
        op="download-data",
        catalog_type="elyra-kfp-examples-catalog",
        component_reference={"path": absolute_path},
        definition=component_definition,
        properties=[],
        categories=[],
    )

    # Fabricate the component cache to include single filename-based component for testing
    ComponentCache.instance()._component_cache[processor._type.name] = {
        "spoofed_catalog": {
            "components": {
                component_id: component
            }
        }
    }

    # Construct hypothetical operation for component
    operation_name = "Download data test"
    operation_params = {
        "url":
        "https://raw.githubusercontent.com/elyra-ai/elyra/master/tests/assets/helloworld.ipynb",
        "curl_options": "--location",
    }
    operation = Operation(
        id="download-data-id",
        type="execution_node",
        classifier=component_id,
        name=operation_name,
        parent_operation_ids=[],
        component_params=operation_params,
    )

    # Build a mock runtime config for use in _cc_pipeline
    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="kfp",
                              metadata=sample_metadata)

    mocked_func = mock.Mock(return_value="default",
                            side_effect=[mocked_runtime, sample_metadata])
    monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)

    # Construct single-operation pipeline
    pipeline = Pipeline(id="pipeline-id",
                        name="kfp_test",
                        runtime="kfp",
                        runtime_config="test",
                        source="download_data.pipeline")
    pipeline.operations[operation.id] = operation

    # Establish path and function to construct pipeline
    pipeline_path = os.path.join(tmpdir, "kfp_test.yaml")
    constructed_pipeline_function = lambda: processor._cc_pipeline(
        pipeline=pipeline, pipeline_name="test_pipeline")

    # TODO Check against both argo and tekton compilations
    # Compile pipeline and save into pipeline_path
    kfp_argo_compiler.Compiler().compile(constructed_pipeline_function,
                                         pipeline_path)

    # Read contents of pipeline YAML
    with open(pipeline_path) as f:
        pipeline_yaml = yaml.safe_load(f.read())

    # Check the pipeline file contents for correctness
    pipeline_template = pipeline_yaml["spec"]["templates"][0]
    assert pipeline_template["metadata"]["annotations"][
        "pipelines.kubeflow.org/task_display_name"] == operation_name
    assert pipeline_template["container"]["command"][3] == operation_params[
        "url"]