Ejemplo n.º 1
0
 def initialize_settings(self):
     self.log.info(f"Config {self.config}")
     # Instantiate singletons with appropriate parent to enable configurability, and convey
     # root_dir to PipelineProcessorManager.
     PipelineProcessorRegistry.instance(root_dir=self.settings["server_root_dir"], parent=self)
     PipelineProcessorManager.instance(root_dir=self.settings["server_root_dir"], parent=self)
     PipelineValidationManager.instance(root_dir=self.settings["server_root_dir"], parent=self)
     FileMetadataCache.instance(parent=self)
     ComponentCache.instance(parent=self).load()
     SchemaManager.instance(parent=self)
Ejemplo n.º 2
0
    async def get(self, runtime_type):
        self.log.debug(
            f"Retrieving pipeline components for runtime type: {runtime_type}")

        processor_manager = PipelineProcessorManager.instance()
        if processor_manager.is_supported_runtime(runtime_type):
            # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp',
            # 'airflow'). This case and its associated functions should eventually be removed
            # in favor of using the RuntimeProcessorType name in the request path.
            self.log.warning(
                f"Deprecation warning: when calling endpoint '{self.request.path}' "
                f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') "
                f"instead of shorthand name (e.g., 'kfp', 'airflow')")
            runtime_processor_type = processor_manager.get_runtime_type(
                runtime_type)
        elif processor_manager.is_supported_runtime_type(runtime_type):
            # The request path uses the appropriate RuntimeProcessorType name. Use this
            # to get the RuntimeProcessorType instance to pass to get_all_components
            runtime_processor_type = RuntimeProcessorType.get_instance_by_name(
                runtime_type)
        else:
            raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'")

        # Include generic components for all runtime types
        components: List[Component] = ComponentCache.get_generic_components()

        # Add additional runtime-type-specific components, if present
        components.extend(ComponentCache.instance().get_all_components(
            platform=runtime_processor_type))

        palette_json = ComponentCache.to_canvas_palette(components=components)

        self.set_status(200)
        self.set_header("Content-Type", "application/json")
        await self.finish(palette_json)
Ejemplo n.º 3
0
def get_runtime_processor_type(
        runtime_type: str, log: Logger,
        request_path: str) -> Optional[RuntimeProcessorType]:
    """
    Gets the runtime processor type for the runtime type given in the request path.

    :param runtime_type: can be the shorthand runtime ('kfp', 'airflow') or the
        runtime type name ('KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') (preferred).
    :param log: used to log the appropriate warning for shorthand-name requests
    :param request_path: full request path of the endpoint

    :returns: the RuntimeProcessorType for the given runtime_type, or None
    """
    processor_manager = PipelineProcessorManager.instance()
    if processor_manager.is_supported_runtime(runtime_type):
        # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp',
        # 'airflow'). This case and its associated functions should eventually be removed
        # in favor of using the RuntimeProcessorType name in the request path.
        log.warning(
            f"Deprecation warning: when calling endpoint '{request_path}' "
            f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') "
            f"instead of shorthand name (e.g., 'kfp', 'airflow')")
        return processor_manager.get_runtime_type(runtime_type)
    elif processor_manager.is_supported_runtime_type(runtime_type):
        # The request path uses the appropriate RuntimeProcessorType name. Use this
        # to get the RuntimeProcessorType instance to pass to get_all_components
        return RuntimeProcessorType.get_instance_by_name(runtime_type)
    return None
Ejemplo n.º 4
0
    async def get(self, runtime_type, component_id):
        self.log.debug(
            f"Retrieving pipeline component properties for component: {component_id}"
        )

        if not component_id:
            raise web.HTTPError(400, "Missing component ID")

        processor_manager = PipelineProcessorManager.instance()
        if processor_manager.is_supported_runtime(runtime_type):
            # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp',
            # 'airflow'). This case and its associated functions should eventually be removed
            # in favor of using the RuntimeProcessorType name in the request path.
            self.log.warning(
                f"Deprecation warning: when calling endpoint '{self.request.path}' "
                f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') "
                f"instead of shorthand name (e.g., 'kfp', 'airflow')")
            runtime_processor_type = processor_manager.get_runtime_type(
                runtime_type)
        elif processor_manager.is_supported_runtime_type(runtime_type):
            # The request path uses the appropriate RuntimeProcessorType name. Use this
            # to get the RuntimeProcessorType instance to pass to get_component
            runtime_processor_type = RuntimeProcessorType.get_instance_by_name(
                runtime_type)
        else:
            raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'")

        # Try to get component_id as a generic component; assigns None if id is not a generic component
        component: Optional[Component] = ComponentCache.get_generic_component(
            component_id)

        # Try to retrieve a runtime-type-specific component; assigns None if not found
        if not component:
            component = ComponentCache.instance().get_component(
                platform=runtime_processor_type, component_id=component_id)

        if not component:
            raise web.HTTPError(404, f"Component '{component_id}' not found")

        if self.request.path.endswith("/properties"):
            # Return complete set of component properties
            json_response = ComponentCache.to_canvas_properties(component)
        else:
            # Return component definition content
            json_response = json.dumps({
                "content":
                component.definition,
                "mimeType":
                self.get_mimetype(component.file_extension)
            })

        self.set_status(200)
        self.set_header("Content-Type", "application/json")
        await self.finish(json_response)
Ejemplo n.º 5
0
def _execute_pipeline(pipeline_definition) -> PipelineProcessorResponse:
    try:
        # parse pipeline
        pipeline_object = PipelineParser().parse(pipeline_definition)
        # process pipeline
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            response = asyncio.get_event_loop().run_until_complete(
                PipelineProcessorManager.instance().process(pipeline_object)
            )
            return response
    except ValueError as ve:
        raise click.ClickException(f"Error parsing pipeline: \n {ve}")
    except RuntimeError as re:
        raise click.ClickException(f"Error processing pipeline: \n {re} \n {re.__cause__}")
Ejemplo n.º 6
0
def export(pipeline_path, runtime_config, output, overwrite):
    """
    Export a pipeline to a runtime-specific format
    """

    click.echo()
    print_banner("Elyra pipeline export")

    rtc = _get_runtime_config(runtime_config)
    runtime_schema = rtc.schema_name
    runtime_type = rtc.metadata.get("runtime_type")

    pipeline_definition = _preprocess_pipeline(pipeline_path, runtime=runtime_schema, runtime_config=runtime_config)

    # Verify that the pipeline's runtime type is compatible with the
    # runtime configuration
    pipeline_runtime_type = _get_pipeline_runtime_type(pipeline_definition)
    if pipeline_runtime_type and pipeline_runtime_type != "Generic" and pipeline_runtime_type != runtime_type:
        raise click.BadParameter(
            f"The runtime configuration type '{runtime_type}' does not match "
            f"the pipeline's runtime type '{pipeline_runtime_type}'.",
            param_hint="--runtime-config",
        )

    resources = RuntimeTypeResources.get_instance_by_type(RuntimeProcessorType.get_instance_by_name(runtime_type))
    supported_export_formats = resources.get_export_extensions()
    if len(supported_export_formats) == 0:
        raise click.ClickException(f"Runtime type '{runtime_type}' does not support export.")

    # If, in the future, a runtime supports multiple export output formats,
    # the user can choose one. For now, choose the only option.
    selected_export_format = supported_export_formats[0]
    selected_export_format_suffix = f".{selected_export_format}"

    # generate output file name from the user-provided input
    if output is None:
        # user did not specify an output; use current directory
        # and derive the file name from the pipeline file name
        output_path = Path.cwd()
        filename = f"{Path(pipeline_path).stem}{selected_export_format_suffix}"
    else:
        if output.suffix == selected_export_format_suffix:
            # user provided a file name
            output_path = output.parent
            filename = output.name
        else:
            # user provided a directory
            output_path = output
            filename = f"{Path(pipeline_path).stem}{selected_export_format_suffix}"
    output_file = output_path.resolve() / filename

    # verify that the output path meets the prerequisites
    if not output_file.parent.is_dir():
        try:
            output_file.parent.mkdir(parents=True, exist_ok=True)
        except Exception as ex:
            raise click.BadParameter(f"Cannot create output directory: {ex}", param_hint="--output")

    # handle output overwrite
    if output_file.exists() and not overwrite:
        raise click.ClickException(
            f"Output file '{str(output_file)}' exists and " "option '--overwrite' was not specified."
        )

    if pipeline_runtime_type:
        _build_component_cache()

    # validate the pipeline
    try:
        _validate_pipeline_definition(pipeline_definition)
    except Exception:
        raise click.ClickException("Pipeline validation FAILED. The pipeline was not exported.")

    with Spinner(text="Exporting pipeline ..."):
        try:
            # parse pipeline
            pipeline_object = PipelineParser().parse(pipeline_definition)
            # process pipeline
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                asyncio.get_event_loop().run_until_complete(
                    PipelineProcessorManager.instance().export(
                        pipeline_object, selected_export_format, str(output_file), True
                    )
                )
        except ValueError as ve:
            raise click.ClickException(f"Error parsing pipeline: \n {ve}")
        except RuntimeError as re:
            raise click.ClickException(f"Error exporting pipeline: \n {re} \n {re.__cause__}")

    click.echo(f"Pipeline was exported to '{str(output_file)}'.")
Ejemplo n.º 7
0
    async def _validate_compatibility(
        self,
        pipeline_definition: PipelineDefinition,
        pipeline_type: str,
        pipeline_runtime: str,
        response: ValidationResponse,
    ) -> None:
        """
        Checks that the pipeline payload is compatible with this version of elyra (ISSUE #938)
        as well as verifying all nodes in the pipeline are supported by the runtime
        :param pipeline_definition: the pipeline definition to be validated
        :param pipeline_type: type of the pipeline runtime being used e.g. KUBEFLOW_PIPELINES, APACHE_AIRFLOW, generic
        :param pipeline_runtime: name of the pipeline runtime for execution  e.g. kfp, airflow, local
        :param response: ValidationResponse containing the issue list to be updated
        """

        primary_pipeline_id = pipeline_definition.primary_pipeline.id
        supported_ops = []

        if pipeline_runtime:
            if not PipelineValidationManager._is_compatible_pipeline(pipeline_runtime, pipeline_type):
                response.add_message(
                    severity=ValidationSeverity.Error,
                    message_type="invalidRuntime",
                    message="Pipeline runtime platform is not compatible " "with selected runtime configuration.",
                    data={
                        "pipelineID": primary_pipeline_id,
                        "pipelineType": pipeline_type,
                        "pipelineRuntime": pipeline_runtime,
                    },
                )
            elif PipelineProcessorManager.instance().is_supported_runtime(pipeline_runtime):
                component_list = await PipelineProcessorManager.instance().get_components(pipeline_runtime)
                for component in component_list:
                    supported_ops.append(component.op)

                # Checks pipeline node types are compatible with the runtime selected
                for sub_pipeline in pipeline_definition.pipelines:
                    for node in sub_pipeline.nodes:
                        if node.op not in ComponentCache.get_generic_component_ops() and pipeline_runtime == "local":
                            response.add_message(
                                severity=ValidationSeverity.Error,
                                message_type="invalidNodeType",
                                message="This pipeline contains at least one runtime-specific "
                                "component, but pipeline runtime is 'local'. Specify a "
                                "runtime config or remove runtime-specific components "
                                "from the pipeline",
                                data={"nodeID": node.id, "nodeOpName": node.op, "pipelineId": sub_pipeline.id},
                            )
                            break
                        if node.type == "execution_node" and node.op not in supported_ops:
                            response.add_message(
                                severity=ValidationSeverity.Error,
                                message_type="invalidNodeType",
                                message="This component was not found in the catalog. Please add it "
                                "to your component catalog or remove this node from the "
                                "pipeline",
                                data={
                                    "nodeID": node.id,
                                    "nodeOpName": node.op,
                                    "nodeName": node.label,
                                    "pipelineId": sub_pipeline.id,
                                },
                            )
            else:
                response.add_message(
                    severity=ValidationSeverity.Error,
                    message_type="invalidRuntime",
                    message="Unsupported pipeline runtime",
                    data={
                        "pipelineRuntime": pipeline_runtime,
                        "pipelineType": pipeline_type,
                        "pipelineId": primary_pipeline_id,
                    },
                )
Ejemplo n.º 8
0
    async def validate(self, pipeline: Dict) -> ValidationResponse:
        """
        Validates the pipeline JSON payload
        :param pipeline: the pipeline definition to be validated
        :return: ValidationResponse containing any and all issues discovered during the validation
        """
        response = ValidationResponse()

        pipeline_definition = PipelineDefinition(pipeline_definition=pipeline)
        issues = pipeline_definition.validate()
        for issue in issues:
            response.add_message(severity=ValidationSeverity.Error, message_type="invalidJSON", message=issue)

        try:
            primary_pipeline = pipeline_definition.primary_pipeline
        except ValueError:
            response.add_message(
                severity=ValidationSeverity.Error,
                message_type="invalidJSON",
                message="Invalid JSON detected, unable to continue.",
            )

            return response

        # Validation can be driven from runtime_config since both runtime and pipeline_type can
        # be derived from that and we should not use the 'runtime' and 'runtime_type' fields in
        # the pipeline.
        # Note: validation updates the pipeline definition with the correct values
        # of 'runtime' and 'runtime_type' obtained from 'runtime_config'.  We may want to move this
        # into PipelineDefinition, but then parsing tests have issues because parsing (tests) assume
        # no validation has been applied to the pipeline.
        runtime_config = primary_pipeline.runtime_config
        if runtime_config is None:
            runtime_config = "local"

        pipeline_runtime = PipelineValidationManager._determine_runtime(runtime_config)
        if PipelineProcessorManager.instance().is_supported_runtime(pipeline_runtime):
            # Set the runtime since its derived from runtime_config and valid
            primary_pipeline.set("runtime", pipeline_runtime)
        else:
            response.add_message(
                severity=ValidationSeverity.Error,
                message_type="invalidRuntime",
                message="Unsupported pipeline runtime",
                data={"pipelineRuntime": pipeline_runtime},
            )

        self._validate_pipeline_structure(pipeline_definition=pipeline_definition, response=response)

        pipeline_type = PipelineValidationManager._determine_runtime_type(runtime_config)
        await self._validate_compatibility(
            pipeline_definition=pipeline_definition,
            pipeline_type=pipeline_type,
            pipeline_runtime=pipeline_runtime,
            response=response,
        )

        self._validate_pipeline_graph(pipeline=pipeline, response=response)

        if response.has_fatal:
            return response

        # Set runtime_type since its derived from runtime_config, in case its needed
        primary_pipeline.set("runtime_type", pipeline_type)

        await self._validate_node_properties(
            pipeline_definition=pipeline_definition,
            pipeline_type=pipeline_type,
            pipeline_runtime=pipeline_runtime,
            response=response,
        )

        return response