def initialize_settings(self): self.log.info(f"Config {self.config}") # Instantiate singletons with appropriate parent to enable configurability, and convey # root_dir to PipelineProcessorManager. PipelineProcessorRegistry.instance(root_dir=self.settings["server_root_dir"], parent=self) PipelineProcessorManager.instance(root_dir=self.settings["server_root_dir"], parent=self) PipelineValidationManager.instance(root_dir=self.settings["server_root_dir"], parent=self) FileMetadataCache.instance(parent=self) ComponentCache.instance(parent=self).load() SchemaManager.instance(parent=self)
async def get(self, runtime_type): self.log.debug( f"Retrieving pipeline components for runtime type: {runtime_type}") processor_manager = PipelineProcessorManager.instance() if processor_manager.is_supported_runtime(runtime_type): # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp', # 'airflow'). This case and its associated functions should eventually be removed # in favor of using the RuntimeProcessorType name in the request path. self.log.warning( f"Deprecation warning: when calling endpoint '{self.request.path}' " f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') " f"instead of shorthand name (e.g., 'kfp', 'airflow')") runtime_processor_type = processor_manager.get_runtime_type( runtime_type) elif processor_manager.is_supported_runtime_type(runtime_type): # The request path uses the appropriate RuntimeProcessorType name. Use this # to get the RuntimeProcessorType instance to pass to get_all_components runtime_processor_type = RuntimeProcessorType.get_instance_by_name( runtime_type) else: raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'") # Include generic components for all runtime types components: List[Component] = ComponentCache.get_generic_components() # Add additional runtime-type-specific components, if present components.extend(ComponentCache.instance().get_all_components( platform=runtime_processor_type)) palette_json = ComponentCache.to_canvas_palette(components=components) self.set_status(200) self.set_header("Content-Type", "application/json") await self.finish(palette_json)
def get_runtime_processor_type( runtime_type: str, log: Logger, request_path: str) -> Optional[RuntimeProcessorType]: """ Gets the runtime processor type for the runtime type given in the request path. :param runtime_type: can be the shorthand runtime ('kfp', 'airflow') or the runtime type name ('KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') (preferred). :param log: used to log the appropriate warning for shorthand-name requests :param request_path: full request path of the endpoint :returns: the RuntimeProcessorType for the given runtime_type, or None """ processor_manager = PipelineProcessorManager.instance() if processor_manager.is_supported_runtime(runtime_type): # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp', # 'airflow'). This case and its associated functions should eventually be removed # in favor of using the RuntimeProcessorType name in the request path. log.warning( f"Deprecation warning: when calling endpoint '{request_path}' " f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') " f"instead of shorthand name (e.g., 'kfp', 'airflow')") return processor_manager.get_runtime_type(runtime_type) elif processor_manager.is_supported_runtime_type(runtime_type): # The request path uses the appropriate RuntimeProcessorType name. Use this # to get the RuntimeProcessorType instance to pass to get_all_components return RuntimeProcessorType.get_instance_by_name(runtime_type) return None
async def get(self, runtime_type, component_id): self.log.debug( f"Retrieving pipeline component properties for component: {component_id}" ) if not component_id: raise web.HTTPError(400, "Missing component ID") processor_manager = PipelineProcessorManager.instance() if processor_manager.is_supported_runtime(runtime_type): # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp', # 'airflow'). This case and its associated functions should eventually be removed # in favor of using the RuntimeProcessorType name in the request path. self.log.warning( f"Deprecation warning: when calling endpoint '{self.request.path}' " f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') " f"instead of shorthand name (e.g., 'kfp', 'airflow')") runtime_processor_type = processor_manager.get_runtime_type( runtime_type) elif processor_manager.is_supported_runtime_type(runtime_type): # The request path uses the appropriate RuntimeProcessorType name. Use this # to get the RuntimeProcessorType instance to pass to get_component runtime_processor_type = RuntimeProcessorType.get_instance_by_name( runtime_type) else: raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'") # Try to get component_id as a generic component; assigns None if id is not a generic component component: Optional[Component] = ComponentCache.get_generic_component( component_id) # Try to retrieve a runtime-type-specific component; assigns None if not found if not component: component = ComponentCache.instance().get_component( platform=runtime_processor_type, component_id=component_id) if not component: raise web.HTTPError(404, f"Component '{component_id}' not found") if self.request.path.endswith("/properties"): # Return complete set of component properties json_response = ComponentCache.to_canvas_properties(component) else: # Return component definition content json_response = json.dumps({ "content": component.definition, "mimeType": self.get_mimetype(component.file_extension) }) self.set_status(200) self.set_header("Content-Type", "application/json") await self.finish(json_response)
def _execute_pipeline(pipeline_definition) -> PipelineProcessorResponse: try: # parse pipeline pipeline_object = PipelineParser().parse(pipeline_definition) # process pipeline with warnings.catch_warnings(): warnings.simplefilter("ignore") response = asyncio.get_event_loop().run_until_complete( PipelineProcessorManager.instance().process(pipeline_object) ) return response except ValueError as ve: raise click.ClickException(f"Error parsing pipeline: \n {ve}") except RuntimeError as re: raise click.ClickException(f"Error processing pipeline: \n {re} \n {re.__cause__}")
def export(pipeline_path, runtime_config, output, overwrite): """ Export a pipeline to a runtime-specific format """ click.echo() print_banner("Elyra pipeline export") rtc = _get_runtime_config(runtime_config) runtime_schema = rtc.schema_name runtime_type = rtc.metadata.get("runtime_type") pipeline_definition = _preprocess_pipeline(pipeline_path, runtime=runtime_schema, runtime_config=runtime_config) # Verify that the pipeline's runtime type is compatible with the # runtime configuration pipeline_runtime_type = _get_pipeline_runtime_type(pipeline_definition) if pipeline_runtime_type and pipeline_runtime_type != "Generic" and pipeline_runtime_type != runtime_type: raise click.BadParameter( f"The runtime configuration type '{runtime_type}' does not match " f"the pipeline's runtime type '{pipeline_runtime_type}'.", param_hint="--runtime-config", ) resources = RuntimeTypeResources.get_instance_by_type(RuntimeProcessorType.get_instance_by_name(runtime_type)) supported_export_formats = resources.get_export_extensions() if len(supported_export_formats) == 0: raise click.ClickException(f"Runtime type '{runtime_type}' does not support export.") # If, in the future, a runtime supports multiple export output formats, # the user can choose one. For now, choose the only option. selected_export_format = supported_export_formats[0] selected_export_format_suffix = f".{selected_export_format}" # generate output file name from the user-provided input if output is None: # user did not specify an output; use current directory # and derive the file name from the pipeline file name output_path = Path.cwd() filename = f"{Path(pipeline_path).stem}{selected_export_format_suffix}" else: if output.suffix == selected_export_format_suffix: # user provided a file name output_path = output.parent filename = output.name else: # user provided a directory output_path = output filename = f"{Path(pipeline_path).stem}{selected_export_format_suffix}" output_file = output_path.resolve() / filename # verify that the output path meets the prerequisites if not output_file.parent.is_dir(): try: output_file.parent.mkdir(parents=True, exist_ok=True) except Exception as ex: raise click.BadParameter(f"Cannot create output directory: {ex}", param_hint="--output") # handle output overwrite if output_file.exists() and not overwrite: raise click.ClickException( f"Output file '{str(output_file)}' exists and " "option '--overwrite' was not specified." ) if pipeline_runtime_type: _build_component_cache() # validate the pipeline try: _validate_pipeline_definition(pipeline_definition) except Exception: raise click.ClickException("Pipeline validation FAILED. The pipeline was not exported.") with Spinner(text="Exporting pipeline ..."): try: # parse pipeline pipeline_object = PipelineParser().parse(pipeline_definition) # process pipeline with warnings.catch_warnings(): warnings.simplefilter("ignore") asyncio.get_event_loop().run_until_complete( PipelineProcessorManager.instance().export( pipeline_object, selected_export_format, str(output_file), True ) ) except ValueError as ve: raise click.ClickException(f"Error parsing pipeline: \n {ve}") except RuntimeError as re: raise click.ClickException(f"Error exporting pipeline: \n {re} \n {re.__cause__}") click.echo(f"Pipeline was exported to '{str(output_file)}'.")
async def _validate_compatibility( self, pipeline_definition: PipelineDefinition, pipeline_type: str, pipeline_runtime: str, response: ValidationResponse, ) -> None: """ Checks that the pipeline payload is compatible with this version of elyra (ISSUE #938) as well as verifying all nodes in the pipeline are supported by the runtime :param pipeline_definition: the pipeline definition to be validated :param pipeline_type: type of the pipeline runtime being used e.g. KUBEFLOW_PIPELINES, APACHE_AIRFLOW, generic :param pipeline_runtime: name of the pipeline runtime for execution e.g. kfp, airflow, local :param response: ValidationResponse containing the issue list to be updated """ primary_pipeline_id = pipeline_definition.primary_pipeline.id supported_ops = [] if pipeline_runtime: if not PipelineValidationManager._is_compatible_pipeline(pipeline_runtime, pipeline_type): response.add_message( severity=ValidationSeverity.Error, message_type="invalidRuntime", message="Pipeline runtime platform is not compatible " "with selected runtime configuration.", data={ "pipelineID": primary_pipeline_id, "pipelineType": pipeline_type, "pipelineRuntime": pipeline_runtime, }, ) elif PipelineProcessorManager.instance().is_supported_runtime(pipeline_runtime): component_list = await PipelineProcessorManager.instance().get_components(pipeline_runtime) for component in component_list: supported_ops.append(component.op) # Checks pipeline node types are compatible with the runtime selected for sub_pipeline in pipeline_definition.pipelines: for node in sub_pipeline.nodes: if node.op not in ComponentCache.get_generic_component_ops() and pipeline_runtime == "local": response.add_message( severity=ValidationSeverity.Error, message_type="invalidNodeType", message="This pipeline contains at least one runtime-specific " "component, but pipeline runtime is 'local'. Specify a " "runtime config or remove runtime-specific components " "from the pipeline", data={"nodeID": node.id, "nodeOpName": node.op, "pipelineId": sub_pipeline.id}, ) break if node.type == "execution_node" and node.op not in supported_ops: response.add_message( severity=ValidationSeverity.Error, message_type="invalidNodeType", message="This component was not found in the catalog. Please add it " "to your component catalog or remove this node from the " "pipeline", data={ "nodeID": node.id, "nodeOpName": node.op, "nodeName": node.label, "pipelineId": sub_pipeline.id, }, ) else: response.add_message( severity=ValidationSeverity.Error, message_type="invalidRuntime", message="Unsupported pipeline runtime", data={ "pipelineRuntime": pipeline_runtime, "pipelineType": pipeline_type, "pipelineId": primary_pipeline_id, }, )
async def validate(self, pipeline: Dict) -> ValidationResponse: """ Validates the pipeline JSON payload :param pipeline: the pipeline definition to be validated :return: ValidationResponse containing any and all issues discovered during the validation """ response = ValidationResponse() pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) issues = pipeline_definition.validate() for issue in issues: response.add_message(severity=ValidationSeverity.Error, message_type="invalidJSON", message=issue) try: primary_pipeline = pipeline_definition.primary_pipeline except ValueError: response.add_message( severity=ValidationSeverity.Error, message_type="invalidJSON", message="Invalid JSON detected, unable to continue.", ) return response # Validation can be driven from runtime_config since both runtime and pipeline_type can # be derived from that and we should not use the 'runtime' and 'runtime_type' fields in # the pipeline. # Note: validation updates the pipeline definition with the correct values # of 'runtime' and 'runtime_type' obtained from 'runtime_config'. We may want to move this # into PipelineDefinition, but then parsing tests have issues because parsing (tests) assume # no validation has been applied to the pipeline. runtime_config = primary_pipeline.runtime_config if runtime_config is None: runtime_config = "local" pipeline_runtime = PipelineValidationManager._determine_runtime(runtime_config) if PipelineProcessorManager.instance().is_supported_runtime(pipeline_runtime): # Set the runtime since its derived from runtime_config and valid primary_pipeline.set("runtime", pipeline_runtime) else: response.add_message( severity=ValidationSeverity.Error, message_type="invalidRuntime", message="Unsupported pipeline runtime", data={"pipelineRuntime": pipeline_runtime}, ) self._validate_pipeline_structure(pipeline_definition=pipeline_definition, response=response) pipeline_type = PipelineValidationManager._determine_runtime_type(runtime_config) await self._validate_compatibility( pipeline_definition=pipeline_definition, pipeline_type=pipeline_type, pipeline_runtime=pipeline_runtime, response=response, ) self._validate_pipeline_graph(pipeline=pipeline, response=response) if response.has_fatal: return response # Set runtime_type since its derived from runtime_config, in case its needed primary_pipeline.set("runtime_type", pipeline_type) await self._validate_node_properties( pipeline_definition=pipeline_definition, pipeline_type=pipeline_type, pipeline_runtime=pipeline_runtime, response=response, ) return response