async def get(self, runtime_type): self.log.debug( f"Retrieving pipeline components for runtime type: {runtime_type}") processor_manager = PipelineProcessorManager.instance() if processor_manager.is_supported_runtime(runtime_type): # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp', # 'airflow'). This case and its associated functions should eventually be removed # in favor of using the RuntimeProcessorType name in the request path. self.log.warning( f"Deprecation warning: when calling endpoint '{self.request.path}' " f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') " f"instead of shorthand name (e.g., 'kfp', 'airflow')") runtime_processor_type = processor_manager.get_runtime_type( runtime_type) elif processor_manager.is_supported_runtime_type(runtime_type): # The request path uses the appropriate RuntimeProcessorType name. Use this # to get the RuntimeProcessorType instance to pass to get_all_components runtime_processor_type = RuntimeProcessorType.get_instance_by_name( runtime_type) else: raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'") # Include generic components for all runtime types components: List[Component] = ComponentCache.get_generic_components() # Add additional runtime-type-specific components, if present components.extend(ComponentCache.instance().get_all_components( platform=runtime_processor_type)) palette_json = ComponentCache.to_canvas_palette(components=components) self.set_status(200) self.set_header("Content-Type", "application/json") await self.finish(palette_json)
def get_component(self, component_id: str) -> Optional[Component]: """ Retrieve runtime-specific component details if component_id is not one of the generic set """ if component_id not in ("notebook", "python-script", "r-script"): return ComponentCache.instance().get_component(platform=self._type, component_id=component_id) return ComponentCache.get_generic_component(component_id)
def initialize_settings(self): self.log.info(f"Config {self.config}") # Instantiate singletons with appropriate parent to enable configurability, and convey # root_dir to PipelineProcessorManager. PipelineProcessorRegistry.instance(root_dir=self.settings["server_root_dir"], parent=self) PipelineProcessorManager.instance(root_dir=self.settings["server_root_dir"], parent=self) PipelineValidationManager.instance(root_dir=self.settings["server_root_dir"], parent=self) FileMetadataCache.instance(parent=self) ComponentCache.instance(parent=self).load() SchemaManager.instance(parent=self)
def get_components(self) -> List[Component]: """ Retrieve components common to all runtimes """ components: List[Component] = ComponentCache.get_generic_components() # Retrieve runtime-specific components components.extend(ComponentCache.instance().get_all_components(platform=self._type)) return components
def component_cache(jp_environ): """ Initialize a component cache """ # Create new instance and load the cache component_cache = ComponentCache.instance(emulate_server_app=True) component_cache.load() yield component_cache component_cache.cache_manager.stop() ComponentCache.clear_instance()
async def get(self, runtime_type, component_id): self.log.debug( f"Retrieving pipeline component properties for component: {component_id}" ) if not component_id: raise web.HTTPError(400, "Missing component ID") processor_manager = PipelineProcessorManager.instance() if processor_manager.is_supported_runtime(runtime_type): # The endpoint path contains the shorthand version of a runtime (e.g., 'kfp', # 'airflow'). This case and its associated functions should eventually be removed # in favor of using the RuntimeProcessorType name in the request path. self.log.warning( f"Deprecation warning: when calling endpoint '{self.request.path}' " f"use runtime type name (e.g. 'KUBEFLOW_PIPELINES', 'APACHE_AIRFLOW') " f"instead of shorthand name (e.g., 'kfp', 'airflow')") runtime_processor_type = processor_manager.get_runtime_type( runtime_type) elif processor_manager.is_supported_runtime_type(runtime_type): # The request path uses the appropriate RuntimeProcessorType name. Use this # to get the RuntimeProcessorType instance to pass to get_component runtime_processor_type = RuntimeProcessorType.get_instance_by_name( runtime_type) else: raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'") # Try to get component_id as a generic component; assigns None if id is not a generic component component: Optional[Component] = ComponentCache.get_generic_component( component_id) # Try to retrieve a runtime-type-specific component; assigns None if not found if not component: component = ComponentCache.instance().get_component( platform=runtime_processor_type, component_id=component_id) if not component: raise web.HTTPError(404, f"Component '{component_id}' not found") if self.request.path.endswith("/properties"): # Return complete set of component properties json_response = ComponentCache.to_canvas_properties(component) else: # Return component definition content json_response = json.dumps({ "content": component.definition, "mimeType": self.get_mimetype(component.file_extension) }) self.set_status(200) self.set_header("Content-Type", "application/json") await self.finish(json_response)
def test_parse_kfp_component_file_no_inputs(): # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Read contents of given path path = _get_resource_path("kfp_test_operator_no_inputs.yaml") catalog_entry_data = {"path": path} # Construct a catalog instance catalog_type = "local-file-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["path"]) # Parse the component entry parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR) component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Properties JSON should only include the two parameters common to every # component:'label' and 'component_source', the component description if # exists (which it does for this component), and the output parameter for # this component num_common_params = 4 assert len( properties_json["current_parameters"].keys()) == num_common_params assert len(properties_json["parameters"]) == num_common_params assert len( properties_json["uihints"]["parameter_info"]) == num_common_params # Total number of groups includes one for each parameter, # plus one for the output group header, # plus 1 for the component_source header num_groups = num_common_params + 2 assert len(properties_json["uihints"]["group_info"][0] ["group_info"]) == num_groups # Ensure that template still renders the two common parameters correctly assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source
async def put(self): # Validate the body cache_refresh = self.get_json_body() if "action" not in cache_refresh or cache_refresh[ "action"] != "refresh": raise web.HTTPError( 400, reason="A body of {'action': 'refresh'} is required!") try: self.log.debug( "Refreshing component cache for all catalog instances...") ComponentCache.instance().refresh() self.set_status(204) except RefreshInProgressError as ripe: self.set_status(409, str(ripe)) await self.finish()
def test_parse_kfp_component_url(): # Define the appropriate reader for a URL-type component definition kfp_supported_file_types = [".yaml"] reader = UrlComponentCatalogConnector(kfp_supported_file_types) # Read contents of given path url = "https://raw.githubusercontent.com/kubeflow/pipelines/1.4.1/components/notebooks/Run_notebook_using_papermill/component.yaml" # noqa: E501 catalog_entry_data = {"url": url} # Construct a catalog instance catalog_type = "url-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["url"]) # Parse the component entry parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR) component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Ensure component parameters are prefixed (and system parameters are not) and all hold correct values assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source assert properties_json["current_parameters"][ "elyra_notebook"] == "None" # Default value for type `inputpath` assert properties_json["current_parameters"]["elyra_parameters"] == { "StringControl": "{}", "activeControl": "StringControl", } assert properties_json["current_parameters"][ "elyra_packages_to_install"] == { "StringControl": "[]", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_input_data"] == { "StringControl": "", "activeControl": "StringControl", }
async def get(self, runtime_type): self.log.debug( f"Retrieving pipeline components for runtime type: {runtime_type}") runtime_processor_type = get_runtime_processor_type( runtime_type, self.log, self.request.path) if not runtime_processor_type: raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'") # Include generic components for all runtime types components: List[Component] = ComponentCache.get_generic_components() # Add additional runtime-type-specific components, if present components.extend(ComponentCache.instance().get_all_components( platform=runtime_processor_type)) palette_json = ComponentCache.to_canvas_palette(components=components) self.set_status(200) self.set_header("Content-Type", "application/json") await self.finish(palette_json)
async def get(self, runtime_type, component_id): self.log.debug( f"Retrieving pipeline component properties for component: {component_id}" ) if not component_id: raise web.HTTPError(400, "Missing component ID") runtime_processor_type = get_runtime_processor_type( runtime_type, self.log, self.request.path) if not runtime_processor_type: raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'") # Try to get component_id as a generic component; assigns None if id is not a generic component component: Optional[Component] = ComponentCache.get_generic_component( component_id) # Try to retrieve a runtime-type-specific component; assigns None if not found if not component: component = ComponentCache.instance().get_component( platform=runtime_processor_type, component_id=component_id) if not component: raise web.HTTPError(404, f"Component '{component_id}' not found") if self.request.path.endswith("/properties"): # Return complete set of component properties json_response = ComponentCache.to_canvas_properties(component) else: # Return component definition content json_response = json.dumps({ "content": component.definition, "mimeType": self.get_mimetype(component.file_extension) }) self.set_status(200) self.set_header("Content-Type", "application/json") await self.finish(json_response)
async def put(self, catalog): # Validate the body cache_refresh = self.get_json_body() if "action" not in cache_refresh or cache_refresh[ "action"] != "refresh": raise web.HTTPError( 400, reason="A body of {'action': 'refresh'} is required.") try: # Ensure given catalog name is a metadata instance catalog_instance = MetadataManager( schemaspace=ComponentCatalogs.COMPONENT_CATALOGS_SCHEMASPACE_ID ).get(name=catalog) except MetadataNotFoundError: raise web.HTTPError(404, f"Catalog '{catalog}' cannot be found.") self.log.debug( f"Refreshing component cache for catalog with name '{catalog}'...") ComponentCache.instance().update(catalog=catalog_instance, action="modify") self.set_status(204) await self.finish()
def test_parse_airflow_component_url(): # Define the appropriate reader for a URL-type component definition airflow_supported_file_types = [".py"] reader = UrlComponentCatalogConnector(airflow_supported_file_types) # Read contents of given path url = "https://raw.githubusercontent.com/apache/airflow/1.10.15/airflow/operators/bash_operator.py" # noqa: E501 catalog_entry_data = {"url": url} # Construct a catalog instance catalog_type = "url-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["url"]) # Parse the component entry parser = ComponentParser.create_instance(platform=RUNTIME_PROCESSOR) component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Ensure component parameters are prefixed, and system parameters are not, and hold correct values assert properties_json["current_parameters"]["label"] == "" # Helper method to retrieve the requested parameter value from the dictionary def get_parameter(param_name): property_dict = properties_json["current_parameters"][param_name] return property_dict[property_dict["activeControl"]] component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source assert get_parameter("elyra_bash_command") == "" assert get_parameter("elyra_xcom_push") is True assert get_parameter("elyra_env") == "{}" # {} assert get_parameter("elyra_output_encoding") == "utf-8"
async def _get_component_properties(self, pipeline_runtime: str, components: dict, node_op: str) -> Dict: """ Retrieve the full dict of properties associated with the node_op :param components: list of components associated with the pipeline runtime being used e.g. kfp, airflow :param node_op: the node operation e.g. execute-notebook-node :return: a list of property names associated with the node op """ if node_op == "execute-notebook-node": node_op = "notebooks" elif node_op == "execute-r-node": node_op = "r-script" elif node_op == "execute-python-node": node_op = "python-script" for category in components["categories"]: for node_type in category["node_types"]: if node_op == node_type["op"]: component: Component = await PipelineProcessorManager.instance().get_component( pipeline_runtime, node_op ) component_properties = ComponentCache.to_canvas_properties(component) return component_properties return {}
def get_components(self): return ComponentCache.get_generic_components()
def test_parse_kfp_component_file(): # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Read contents of given path path = _get_resource_path("kfp_test_operator.yaml") catalog_entry_data = {"path": path} # Construct a catalog instance catalog_type = "local-file-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["path"]) # Parse the component entry parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR) component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Ensure description is rendered even with an unescaped character description = 'This component description contains an unescaped " character' assert properties_json["current_parameters"][ "component_description"] == description # Ensure component parameters are prefixed (and system parameters are not) and all hold correct values assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source assert properties_json["current_parameters"][ "elyra_test_string_no_default"] == { "StringControl": "", "activeControl": "StringControl", } assert properties_json["current_parameters"][ "elyra_test_string_default_value"] == { "StringControl": "default", "activeControl": "StringControl", } assert properties_json["current_parameters"][ "elyra_test_string_default_empty"] == { "StringControl": "", "activeControl": "StringControl", } assert properties_json["current_parameters"][ "elyra_test_bool_default"] == { "BooleanControl": False, "activeControl": "BooleanControl", } assert properties_json["current_parameters"]["elyra_test_bool_false"] == { "BooleanControl": False, "activeControl": "BooleanControl", } assert properties_json["current_parameters"]["elyra_test_bool_true"] == { "BooleanControl": True, "activeControl": "BooleanControl", } assert properties_json["current_parameters"]["elyra_test_int_default"] == { "NumberControl": 0, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_test_int_zero"] == { "NumberControl": 0, "activeControl": "NumberControl", } assert properties_json["current_parameters"][ "elyra_test_int_non_zero"] == { "NumberControl": 1, "activeControl": "NumberControl", } assert properties_json["current_parameters"][ "elyra_test_float_default"] == { "NumberControl": 0.0, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_test_float_zero"] == { "NumberControl": 0.0, "activeControl": "NumberControl", } assert properties_json["current_parameters"][ "elyra_test_float_non_zero"] == { "NumberControl": 1.0, "activeControl": "NumberControl", } assert properties_json["current_parameters"][ "elyra_test_dict_default"] == { "StringControl": "{}", "activeControl": "StringControl", } # {} assert properties_json["current_parameters"][ "elyra_test_list_default"] == { "StringControl": "[]", "activeControl": "StringControl", } # [] # Ensure that the 'required' attribute was set correctly. KFP components default to required # unless explicitly marked otherwise in component YAML. required_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_required_property") assert required_property["data"]["required"] is True optional_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_optional_property") assert optional_property["data"]["required"] is False default_required_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_required_property_default") assert default_required_property["data"]["required"] is True # Ensure that type information is inferred correctly unusual_dict_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_dict") assert unusual_dict_property["data"]["controls"]["StringControl"][ "format"] == "dictionary" unusual_list_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_list") assert unusual_list_property["data"]["controls"]["StringControl"][ "format"] == "list" unusual_string_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_string") assert unusual_string_property["data"]["controls"]["StringControl"][ "format"] == "string" file_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_file") assert file_property["data"]["format"] == "inputpath" no_type_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_notgiven") assert no_type_property["data"]["controls"]["StringControl"][ "format"] == "string" # Ensure descriptions are rendered properly with type hint in parentheses assert (unusual_dict_property["description"]["default"] == "The test command description " "(type: Dictionary of arrays)") assert unusual_list_property["description"][ "default"] == "The test command description (type: An array)" assert unusual_string_property["description"][ "default"] == "The test command description (type: A string)" assert ( file_property["description"]["default"] == "The test command description" ) # No data type info is included in parentheses for inputPath variables assert no_type_property["description"][ "default"] == "The test command description (type: string)"
async def test_parse_components_additional_metatypes(): # Define the appropriate reader for a URL-type component definition kfp_supported_file_types = [".yaml"] reader = UrlComponentCatalogConnector(kfp_supported_file_types) # Read contents of given path url = "https://raw.githubusercontent.com/kubeflow/pipelines/1.4.1/components/keras/Train_classifier/from_CSV/component.yaml" # noqa: E501 catalog_entry_data = {"url": url} # Construct a catalog instance catalog_type = "url-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["url"]) # Parse the component entry parser = KfpComponentParser() component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Ensure component parameters are prefixed (and system parameters are not) and all hold correct values assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source assert properties_json["current_parameters"][ "elyra_training_features"] == "None" # inputPath assert properties_json["current_parameters"][ "elyra_training_labels"] == "None" # inputPath assert properties_json["current_parameters"][ "elyra_network_json"] == "None" # inputPath assert properties_json["current_parameters"]["elyra_loss_name"] == { "StringControl": "categorical_crossentropy", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_num_classes"] == { "NumberControl": 0, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_optimizer"] == { "StringControl": "rmsprop", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_optimizer_config"] == { "StringControl": "", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_learning_rate"] == { "NumberControl": 0.01, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_num_epochs"] == { "NumberControl": 100, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_batch_size"] == { "NumberControl": 32, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_metrics"] == { "StringControl": "['accuracy']", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_random_seed"] == { "NumberControl": 0, "activeControl": "NumberControl", }
def _build_component_cache(): """Initialize a ComponentCache instance and wait for it to complete all tasks""" with Spinner(text="Initializing the component cache..."): component_cache = ComponentCache.instance(emulate_server_app=True) component_cache.load() component_cache.wait_for_all_cache_tasks()
def _cc_pipeline( self, pipeline, pipeline_name, pipeline_version="", experiment_name="", cos_directory=None, export=False ): runtime_configuration = self._get_metadata_configuration( schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config ) cos_endpoint = runtime_configuration.metadata["cos_endpoint"] cos_username = runtime_configuration.metadata.get("cos_username") cos_password = runtime_configuration.metadata.get("cos_password") cos_secret = runtime_configuration.metadata.get("cos_secret") cos_bucket = runtime_configuration.metadata.get("cos_bucket") if cos_directory is None: cos_directory = pipeline_name engine = runtime_configuration.metadata["engine"] self.log_pipeline_info( pipeline_name, f"processing pipeline dependencies to: {cos_endpoint} " f"bucket: {cos_bucket} folder: {cos_directory}", ) t0_all = time.time() emptydir_volume_size = "" container_runtime = bool(os.getenv("CRIO_RUNTIME", "False").lower() == "true") # Create dictionary that maps component Id to its ContainerOp instance target_ops = {} # Sort operations based on dependency graph (topological order) sorted_operations = PipelineProcessor._sort_operations(pipeline.operations) # Determine whether access to cloud storage is required for operation in sorted_operations: if isinstance(operation, GenericOperation): self._verify_cos_connectivity(runtime_configuration) break # All previous operation outputs should be propagated throughout the pipeline. # In order to process this recursively, the current operation's inputs should be combined # from its parent's inputs (which, themselves are derived from the outputs of their parent) # and its parent's outputs. PipelineProcessor._propagate_operation_inputs_outputs(pipeline, sorted_operations) for operation in sorted_operations: if container_runtime: # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi emptydir_volume_size = "20Gi" sanitized_operation_name = self._sanitize_operation_name(operation.name) # Create pipeline operation # If operation is one of the "generic" set of NBs or scripts, construct custom ExecuteFileOp if isinstance(operation, GenericOperation): # Collect env variables pipeline_envs = self._collect_envs( operation, cos_secret=cos_secret, cos_username=cos_username, cos_password=cos_password ) operation_artifact_archive = self._get_dependency_archive_name(operation) self.log.debug(f"Creating pipeline component:\n {operation} archive : {operation_artifact_archive}") target_ops[operation.id] = ExecuteFileOp( name=sanitized_operation_name, pipeline_name=pipeline_name, experiment_name=experiment_name, notebook=operation.filename, cos_endpoint=cos_endpoint, cos_bucket=cos_bucket, cos_directory=cos_directory, cos_dependencies_archive=operation_artifact_archive, pipeline_version=pipeline_version, pipeline_source=pipeline.source, pipeline_inputs=operation.inputs, pipeline_outputs=operation.outputs, pipeline_envs=pipeline_envs, emptydir_volume_size=emptydir_volume_size, cpu_request=operation.cpu, mem_request=operation.memory, gpu_limit=operation.gpu, workflow_engine=engine, image=operation.runtime_image, file_outputs={ "mlpipeline-metrics": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-metrics.json", # noqa "mlpipeline-ui-metadata": f"{pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']}/mlpipeline-ui-metadata.json", # noqa }, ) if operation.doc: target_ops[operation.id].add_pod_annotation("elyra/node-user-doc", operation.doc) # TODO Can we move all of this to apply to non-standard components as well? Test when servers are up if cos_secret and not export: target_ops[operation.id].apply(use_aws_secret(cos_secret)) image_namespace = self._get_metadata_configuration(RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID) for image_instance in image_namespace: if image_instance.metadata["image_name"] == operation.runtime_image and image_instance.metadata.get( "pull_policy" ): target_ops[operation.id].container.set_image_pull_policy(image_instance.metadata["pull_policy"]) self.log_pipeline_info( pipeline_name, f"processing operation dependencies for id: {operation.id}", operation_name=operation.name, ) self._upload_dependencies_to_object_store(runtime_configuration, cos_directory, operation) # If operation is a "non-standard" component, load it's spec and create operation with factory function else: # Retrieve component from cache component = ComponentCache.instance().get_component(self._type, operation.classifier) # Convert the user-entered value of certain properties according to their type for component_property in component.properties: # Get corresponding property's value from parsed pipeline property_value = operation.component_params.get(component_property.ref) self.log.debug( f"Processing component parameter '{component_property.name}' " f"of type '{component_property.data_type}'" ) if component_property.data_type == "inputpath": output_node_id = property_value["value"] output_node_parameter_key = property_value["option"].replace("elyra_output_", "") operation.component_params[component_property.ref] = target_ops[output_node_id].outputs[ output_node_parameter_key ] elif component_property.data_type == "inputvalue": active_property = property_value["activeControl"] active_property_value = property_value.get(active_property, None) # If the value is not found, assign it the default value assigned in parser if active_property_value is None: active_property_value = component_property.value if isinstance(active_property_value, dict) and set(active_property_value.keys()) == { "value", "option", }: output_node_id = active_property_value["value"] output_node_parameter_key = active_property_value["option"].replace("elyra_output_", "") operation.component_params[component_property.ref] = target_ops[output_node_id].outputs[ output_node_parameter_key ] elif component_property.default_data_type == "dictionary": processed_value = self._process_dictionary_value(active_property_value) operation.component_params[component_property.ref] = processed_value elif component_property.default_data_type == "list": processed_value = self._process_list_value(active_property_value) operation.component_params[component_property.ref] = processed_value else: operation.component_params[component_property.ref] = active_property_value # Build component task factory try: factory_function = components.load_component_from_text(component.definition) except Exception as e: # TODO Fix error messaging and break exceptions down into categories self.log.error(f"Error loading component spec for {operation.name}: {str(e)}") raise RuntimeError(f"Error loading component spec for {operation.name}.") # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict try: comp_spec_inputs = [ inputs.name.lower().replace(" ", "_") for inputs in factory_function.component_spec.inputs ] # Remove inputs and outputs from params dict # TODO: need to have way to retrieve only required params parameter_removal_list = ["inputs", "outputs"] for component_param in operation.component_params_as_dict.keys(): if component_param not in comp_spec_inputs: parameter_removal_list.append(component_param) for parameter in parameter_removal_list: operation.component_params_as_dict.pop(parameter, None) # Create ContainerOp instance and assign appropriate user-provided name sanitized_component_params = { self._sanitize_param_name(name): value for name, value in operation.component_params_as_dict.items() } container_op = factory_function(**sanitized_component_params) container_op.set_display_name(operation.name) if operation.doc: container_op.add_pod_annotation("elyra/node-user-doc", operation.doc) target_ops[operation.id] = container_op except Exception as e: # TODO Fix error messaging and break exceptions down into categories self.log.error(f"Error constructing component {operation.name}: {str(e)}") raise RuntimeError(f"Error constructing component {operation.name}.") # Process dependencies after all the operations have been created for operation in pipeline.operations.values(): op = target_ops[operation.id] for parent_operation_id in operation.parent_operation_ids: parent_op = target_ops[parent_operation_id] # Parent Operation op.after(parent_op) self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all)) return target_ops
def test_parse_airflow_component_file(): # Define the appropriate reader for a filesystem-type component definition airflow_supported_file_types = [".py"] reader = FilesystemComponentCatalogConnector(airflow_supported_file_types) # Read contents of given path path = _get_resource_path("airflow_test_operator.py") catalog_entry_data = {"path": path} # Construct a catalog instance catalog_type = "local-file-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["path"]) # Parse the component entry parser = ComponentParser.create_instance(platform=RUNTIME_PROCESSOR) components = parser.parse(catalog_entry) assert len( components ) == 3 # TestOperator, DeriveFromTestOperator, and DeriveFromImportedOperator # Split components list into its constituent operators components = sorted(components, key=lambda component: component.id) import_test_op, derive_test_op, test_op = components[0], components[ 1], components[2] # Helper method to retrieve the requested parameter value from the dictionary def get_parameter_value(param_name): property_dict = properties_json["current_parameters"][param_name] return property_dict[property_dict["activeControl"]] # Helper method to retrieve the requested parameter info from the dictionary def get_parameter_format(param_name, control_id="StringControl"): param_info = None for prop_info in properties_json["uihints"]["parameter_info"]: if prop_info.get("parameter_ref") == param_name: param_info = prop_info["data"]["controls"][control_id][ "format"] break return param_info # Helper method to retrieve the requested parameter description from the dictionary def get_parameter_description(param_name): param_desc = None for prop_info in properties_json["uihints"]["parameter_info"]: if prop_info.get("parameter_ref") == param_name: param_desc = prop_info["description"]["default"] break return param_desc # Helper method to retrieve whether the requested parameter is required def get_parameter_required(param_name): param_info = None for prop_info in properties_json["uihints"]["parameter_info"]: if prop_info.get("parameter_ref") == param_name: param_info = prop_info["data"]["required"] break return param_info # Retrieve properties for TestOperator # Test Operator does not include type hints for the init function args properties_json = ComponentCache.to_canvas_properties(test_op) # Ensure system parameters are not prefixed and hold correct values assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source # Ensure component parameters are prefixed with 'elyra_' and values are as expected assert get_parameter_value("elyra_str_no_default") == "" assert get_parameter_value("elyra_str_default") == "default" assert get_parameter_value("elyra_str_empty") == "" assert get_parameter_value("elyra_str_not_in_docstring") == "" assert get_parameter_value("elyra_bool_no_default") is False assert get_parameter_value("elyra_bool_default_false") is False assert get_parameter_value("elyra_bool_default_true") is True assert get_parameter_value("elyra_bool_not_in_docstring") is False assert get_parameter_value("elyra_int_no_default") == 0 assert get_parameter_value("elyra_int_default_zero") == 0 assert get_parameter_value("elyra_int_default_non_zero") == 2 assert get_parameter_value("elyra_int_not_in_docstring") == 3 assert get_parameter_value("elyra_dict_default_is_none") == "{}" # {} assert get_parameter_value("elyra_list_default_is_none") == "[]" # [] # Ensure that type information is inferred correctly for properties that # define 'unusual' types, such as 'a dictionary of lists' assert get_parameter_format("elyra_unusual_type_dict") == "dictionary" assert get_parameter_format("elyra_unusual_type_list") == "list" # Ensure that type information falls back to string if no type hint present # and no ':type: <type info>' phrase found in docstring assert get_parameter_format("elyra_fallback_type") == "string" # Ensure component parameters are marked as required in the correct circumstances # (parameter is required if there is no default value provided or if a type hint # does not include 'Optional[...]') assert get_parameter_required("elyra_str_no_default") is True assert get_parameter_required("elyra_str_default") is False assert get_parameter_required("elyra_str_empty") is False # Ensure descriptions are rendered properly with type hint in parentheses assert (get_parameter_description("elyra_unusual_type_dict") == "a dictionary parameter with the " "phrase 'list' in type description " "(type: a dictionary of arrays)") assert (get_parameter_description("elyra_unusual_type_list") == "a list parameter with the phrase " "'string' in type description " "(type: a list of strings)") assert get_parameter_description("elyra_fallback_type") == "(type: str)" # Ensure that a long description with line wrapping and a backslash escape has rendered # (and hence did not raise an error during json.loads in the properties API request) parsed_description = """a string parameter with a very long description that wraps lines and also has an escaped underscore in it, as shown here: (\_) # noqa W605""" modified_description = parsed_description.replace( "\n", " ") + " (type: str)" # modify desc acc. to parser rules assert get_parameter_description( "elyra_long_description_property") == modified_description # Retrieve properties for DeriveFromTestOperator # DeriveFromTestOperator includes type hints for all init arguments properties_json = ComponentCache.to_canvas_properties(derive_test_op) # Ensure default values are parsed correct in the case where type hints are present assert get_parameter_value("elyra_str_default") == "default" assert get_parameter_value("elyra_bool_default") is True assert get_parameter_value("elyra_int_default") == 2 # Ensure component parameters are prefixed with 'elyra_' and types are as expected # in the case when a type hint is provided (and regardless of whether or not the # parameter type is included in the docstring) assert get_parameter_format("elyra_str_no_default") == "string" assert get_parameter_format("elyra_str_default") == "string" assert get_parameter_format("elyra_str_optional_default") == "string" assert get_parameter_format("elyra_str_not_in_docstring") == "string" assert get_parameter_format("elyra_bool_no_default", "BooleanControl") == "boolean" assert get_parameter_format("elyra_bool_default", "BooleanControl") == "boolean" assert get_parameter_format("elyra_bool_not_in_docstring", "BooleanControl") == "boolean" assert get_parameter_format("elyra_int_no_default", "NumberControl") == "number" assert get_parameter_format("elyra_int_default", "NumberControl") == "number" assert get_parameter_format("elyra_int_not_in_docstring", "NumberControl") == "number" assert get_parameter_format("elyra_list_optional_default") == "list" # Ensure component parameters are marked as required in the correct circumstances assert get_parameter_required("elyra_str_no_default") is True assert get_parameter_required("elyra_str_default") is False assert get_parameter_required("elyra_str_optional_default") is False assert get_parameter_required("elyra_str_not_in_docstring") is True # Retrieve properties for DeriveFromImportedOperator # DeriveFromImportedOperator includes type hints for dictionary and # list values to test the more complex parsing required in this case properties_json = ComponentCache.to_canvas_properties(import_test_op) # Ensure component parameters are prefixed with 'elyra_' and types are as expected assert get_parameter_format("elyra_dict_no_default") == "dictionary" assert get_parameter_format( "elyra_dict_optional_no_default") == "dictionary" assert get_parameter_format("elyra_nested_dict_default") == "dictionary" assert get_parameter_format("elyra_dict_not_in_docstring") == "dictionary" assert get_parameter_format("elyra_list_no_default") == "list" assert get_parameter_format("elyra_list_optional_no_default") == "list" assert get_parameter_format("elyra_list_default") == "list" assert get_parameter_format("elyra_list_optional_default") == "list" assert get_parameter_format("elyra_list_not_in_docstring") == "list" assert get_parameter_value("elyra_dict_no_default") == "{}" assert get_parameter_value("elyra_list_no_default") == "[]"
def _cc_pipeline(self, pipeline, pipeline_name): runtime_configuration = self._get_metadata_configuration( schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config) image_namespace = self._get_metadata_configuration( schemaspace=RuntimeImages.RUNTIME_IMAGES_SCHEMASPACE_ID) cos_endpoint = runtime_configuration.metadata.get("cos_endpoint") cos_username = runtime_configuration.metadata.get("cos_username") cos_password = runtime_configuration.metadata.get("cos_password") cos_secret = runtime_configuration.metadata.get("cos_secret") cos_directory = pipeline_name cos_bucket = runtime_configuration.metadata.get("cos_bucket") # Create dictionary that maps component Id to its ContainerOp instance target_ops = [] self.log_pipeline_info( pipeline_name, f"processing pipeline dependencies to: {cos_endpoint} " f"bucket: {cos_bucket} folder: {pipeline_name}", ) t0_all = time.time() # Sort operations based on dependency graph (topological order) sorted_operations = PipelineProcessor._sort_operations( pipeline.operations) # Determine whether access to cloud storage is required and check connectivity for operation in sorted_operations: if isinstance(operation, GenericOperation): self._verify_cos_connectivity(runtime_configuration) break # All previous operation outputs should be propagated throughout the pipeline. # In order to process this recursively, the current operation's inputs should be combined # from its parent's inputs (which, themselves are derived from the outputs of their parent) # and its parent's outputs. PipelineProcessor._propagate_operation_inputs_outputs( pipeline, sorted_operations) # Scrub all node labels of invalid characters scrubbed_operations = self._scrub_invalid_characters_from_list( sorted_operations) # Generate unique names for all operations unique_operations = self._create_unique_node_names(scrubbed_operations) for operation in unique_operations: if isinstance(operation, GenericOperation): operation_artifact_archive = self._get_dependency_archive_name( operation) self.log.debug( f"Creating pipeline component:\n {operation} archive : {operation_artifact_archive}" ) # Collect env variables pipeline_envs = self._collect_envs(operation, cos_secret=cos_secret, cos_username=cos_username, cos_password=cos_password) # Generate unique ELYRA_RUN_NAME value and expose it as an # environment variable in the container. # Notebook | script nodes are implemented using the kubernetes_pod_operator # (https://airflow.apache.org/docs/apache-airflow/1.10.12/_api/airflow/contrib/operators/kubernetes_pod_operator/index.html) # Environment variables that are passed to this operator are # pre-processed by Airflow at runtime and placeholder values (expressed as '{{ xyz }}' # - see https://airflow.apache.org/docs/apache-airflow/1.10.12/macros-ref#default-variables) # replaced. if pipeline_envs is None: pipeline_envs = {} pipeline_envs[ "ELYRA_RUN_NAME"] = f"{pipeline_name}-{{{{ ts_nodash }}}}" image_pull_policy = None runtime_image_pull_secret = None for image_instance in image_namespace: if image_instance.metadata[ "image_name"] == operation.runtime_image: if image_instance.metadata.get("pull_policy"): image_pull_policy = image_instance.metadata[ "pull_policy"] if image_instance.metadata.get("pull_secret"): runtime_image_pull_secret = image_instance.metadata[ "pull_secret"] break bootscript = BootscriptBuilder( filename=operation.filename, cos_endpoint=cos_endpoint, cos_bucket=cos_bucket, cos_directory=cos_directory, cos_dependencies_archive=operation_artifact_archive, inputs=operation.inputs, outputs=operation.outputs, ) target_op = { "notebook": operation.name, "id": operation.id, "argument_list": bootscript.container_cmd, "runtime_image": operation.runtime_image, "pipeline_envs": pipeline_envs, "parent_operation_ids": operation.parent_operation_ids, "image_pull_policy": image_pull_policy, "cpu_request": operation.cpu, "mem_request": operation.memory, "gpu_limit": operation.gpu, "operator_source": operation.component_params["filename"], "is_generic_operator": True, "doc": operation.doc, } if runtime_image_pull_secret is not None: target_op[ "runtime_image_pull_secret"] = runtime_image_pull_secret target_ops.append(target_op) self.log_pipeline_info( pipeline_name, f"processing operation dependencies for id: {operation.id}", operation_name=operation.name, ) self._upload_dependencies_to_object_store( runtime_configuration, pipeline_name, operation) else: # Retrieve component from cache component = ComponentCache.instance().get_component( self._type, operation.classifier) # Convert the user-entered value of certain properties according to their type for component_property in component.properties: # Skip properties for which no value was given if component_property.ref not in operation.component_params.keys( ): continue # Get corresponding property's value from parsed pipeline property_value_dict = operation.component_params.get( component_property.ref) # The type and value of this property can vary depending on what the user chooses # in the pipeline editor. So we get the current active parameter (e.g. StringControl) # from the activeControl value active_property_name = property_value_dict["activeControl"] # One we have the value (e.g. StringControl) we use can retrieve the value # assigned to it property_value = property_value_dict.get( active_property_name, None) # If the value is not found, assign it the default value assigned in parser if property_value is None: property_value = component_property.value self.log.debug( f"Active property name : {active_property_name}, value : {property_value}" ) self.log.debug( f"Processing component parameter '{component_property.name}' " f"of type '{component_property.data_type}'") if (property_value and str(property_value)[0] == "{" and str(property_value)[-1] == "}" and isinstance( json.loads(json.dumps(property_value)), dict) and set( json.loads(json.dumps(property_value)).keys()) == {"value", "option"}): parent_node_name = self._get_node_name( target_ops, json.loads(json.dumps(property_value))["value"]) processed_value = "\"{{ ti.xcom_pull(task_ids='" + parent_node_name + "') }}\"" operation.component_params[ component_property.ref] = processed_value elif component_property.data_type == "boolean": operation.component_params[ component_property.ref] = property_value elif component_property.data_type == "string": # Add surrounding quotation marks to string value for correct rendering # in jinja DAG template operation.component_params[ component_property.ref] = json.dumps( property_value) elif component_property.data_type == "dictionary": processed_value = self._process_dictionary_value( property_value) operation.component_params[ component_property.ref] = processed_value elif component_property.data_type == "list": processed_value = self._process_list_value( property_value) operation.component_params[ component_property.ref] = processed_value # Remove inputs and outputs from params dict until support for data exchange is provided operation.component_params_as_dict.pop("inputs") operation.component_params_as_dict.pop("outputs") # Locate the import statement. If not found raise... import_stmts = [] # Check for import statement on Component object, otherwise get from class_import_map import_stmt = component.import_statement or self.class_import_map.get( component.name) if import_stmt: import_stmts.append(import_stmt) else: # If we didn't find a mapping to the import statement, let's check if the component # name includes a package prefix. If it does, log a warning, but proceed, otherwise # raise an exception. if len(component.name.split( ".")) > 1: # We (presumably) have a package prefix self.log.warning( f"Operator '{component.name}' of node '{operation.name}' is not configured " f"in the list of available Airflow operators but appears to include a " f"package prefix and processing will proceed.") else: raise ValueError( f"Operator '{component.name}' of node '{operation.name}' is not configured " f"in the list of available operators. Please add the fully-qualified " f"package name for '{component.name}' to the " f"AirflowPipelineProcessor.available_airflow_operators configuration." ) target_op = { "notebook": operation.name, "id": operation.id, "imports": import_stmts, "class_name": component.name, "parent_operation_ids": operation.parent_operation_ids, "component_params": operation.component_params_as_dict, "operator_source": component.component_source, "is_generic_operator": False, "doc": operation.doc, } target_ops.append(target_op) ordered_target_ops = OrderedDict() while target_ops: for i in range(len(target_ops)): target_op = target_ops.pop(0) if not target_op["parent_operation_ids"]: ordered_target_ops[target_op["id"]] = target_op self.log.debug("Root Node added : %s", ordered_target_ops[target_op["id"]]) elif all(deps in ordered_target_ops.keys() for deps in target_op["parent_operation_ids"]): ordered_target_ops[target_op["id"]] = target_op self.log.debug("Dependent Node added : %s", ordered_target_ops[target_op["id"]]) else: target_ops.append(target_op) self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all)) return ordered_target_ops
def pipeline(): ComponentCache.instance().wait_for_all_cache_tasks() pipeline_resource = _read_pipeline_resource( "resources/sample_pipelines/pipeline_3_node_sample.json") return PipelineParser.parse(pipeline_resource)
async def _validate_compatibility( self, pipeline_definition: PipelineDefinition, pipeline_type: str, pipeline_runtime: str, response: ValidationResponse, ) -> None: """ Checks that the pipeline payload is compatible with this version of elyra (ISSUE #938) as well as verifying all nodes in the pipeline are supported by the runtime :param pipeline_definition: the pipeline definition to be validated :param pipeline_type: type of the pipeline runtime being used e.g. KUBEFLOW_PIPELINES, APACHE_AIRFLOW, generic :param pipeline_runtime: name of the pipeline runtime for execution e.g. kfp, airflow, local :param response: ValidationResponse containing the issue list to be updated """ primary_pipeline_id = pipeline_definition.primary_pipeline.id supported_ops = [] if pipeline_runtime: if not PipelineValidationManager._is_compatible_pipeline(pipeline_runtime, pipeline_type): response.add_message( severity=ValidationSeverity.Error, message_type="invalidRuntime", message="Pipeline runtime platform is not compatible " "with selected runtime configuration.", data={ "pipelineID": primary_pipeline_id, "pipelineType": pipeline_type, "pipelineRuntime": pipeline_runtime, }, ) elif PipelineProcessorManager.instance().is_supported_runtime(pipeline_runtime): component_list = await PipelineProcessorManager.instance().get_components(pipeline_runtime) for component in component_list: supported_ops.append(component.op) # Checks pipeline node types are compatible with the runtime selected for sub_pipeline in pipeline_definition.pipelines: for node in sub_pipeline.nodes: if node.op not in ComponentCache.get_generic_component_ops() and pipeline_runtime == "local": response.add_message( severity=ValidationSeverity.Error, message_type="invalidNodeType", message="This pipeline contains at least one runtime-specific " "component, but pipeline runtime is 'local'. Specify a " "runtime config or remove runtime-specific components " "from the pipeline", data={"nodeID": node.id, "nodeOpName": node.op, "pipelineId": sub_pipeline.id}, ) break if node.type == "execution_node" and node.op not in supported_ops: response.add_message( severity=ValidationSeverity.Error, message_type="invalidNodeType", message="This component was not found in the catalog. Please add it " "to your component catalog or remove this node from the " "pipeline", data={ "nodeID": node.id, "nodeOpName": node.op, "nodeName": node.label, "pipelineId": sub_pipeline.id, }, ) else: response.add_message( severity=ValidationSeverity.Error, message_type="invalidRuntime", message="Unsupported pipeline runtime", data={ "pipelineRuntime": pipeline_runtime, "pipelineType": pipeline_type, "pipelineId": primary_pipeline_id, }, )
async def _validate_custom_component_node_properties( self, node: Node, response: ValidationResponse, pipeline_definition: PipelineDefinition, pipeline_runtime: str ): """ Validates the properties of the custom component node :param node: the node to be validated :param response: the validation response object to attach any error messages :param pipeline_definition: the pipeline definition containing the node :param pipeline_runtime: the pipeline runtime selected :return: """ component_list = await PipelineProcessorManager.instance().get_components(pipeline_runtime) components = ComponentCache.to_canvas_palette(component_list) # Full dict of properties for the operation e.g. current params, optionals etc component_property_dict = await self._get_component_properties(pipeline_runtime, components, node.op) # List of just the current parameters for the component current_parameter_defaults_list = list( map(lambda x: str(x).replace("elyra_", ""), component_property_dict["current_parameters"].keys()) ) # Remove the non component_parameter jinja templated values we do not check against current_parameter_defaults_list.remove("component_source") current_parameter_defaults_list.remove("label") for default_parameter in current_parameter_defaults_list: node_param = node.get_component_parameter(default_parameter) if self._is_required_property(component_property_dict, default_parameter): if not node_param: response.add_message( severity=ValidationSeverity.Error, message_type="invalidNodeProperty", message="Node is missing required property.", data={"nodeID": node.id, "nodeName": node.label, "propertyName": default_parameter}, ) elif self._get_component_type(component_property_dict, default_parameter) == "inputpath": # Any component property with type `InputPath` will be a dictionary of two keys # "value": the node ID of the parent node containing the output # "option": the name of the key (which is an output) of the above referenced node if ( not isinstance(node_param, dict) or len(node_param) != 2 or set(node_param.keys()) != {"value", "option"} ): response.add_message( severity=ValidationSeverity.Error, message_type="invalidNodeProperty", message="Node has malformed `InputPath` parameter structure", data={"nodeID": node.id, "nodeName": node.label}, ) node_ids = list(x.get("node_id_ref", None) for x in node.component_links) parent_list = self._get_parent_id_list(pipeline_definition, node_ids, []) node_param_value = node_param.get("value") if node_param_value not in parent_list: response.add_message( severity=ValidationSeverity.Error, message_type="invalidNodeProperty", message="Node contains an invalid inputpath reference. Please " "check your node-to-node connections", data={"nodeID": node.id, "nodeName": node.label}, ) elif isinstance(node_param, dict) and node_param.get("activeControl") == "NestedEnumControl": if not node_param.get("NestedEnumControl"): response.add_message( severity=ValidationSeverity.Error, message_type="invalidNodeProperty", message="Node contains an invalid reference to an node output. Please " "check the node properties are configured properly", data={"nodeID": node.id, "nodeName": node.label}, ) else: # TODO: Update this hardcoded check for xcom_push. This parameter is specific to a runtime # (Airflow). i.e. abstraction for byo validation? node_param_value = node_param["NestedEnumControl"].get("value") upstream_node = pipeline_definition.get_node(node_param_value) xcom_param = upstream_node.get_component_parameter("xcom_push") if xcom_param: xcom_value = xcom_param.get("BooleanControl") if not xcom_value: response.add_message( severity=ValidationSeverity.Error, message_type="invalidNodeProperty", message="Node contains an invalid input reference. The parent " "node does not have the xcom_push property enabled", data={ "nodeID": node.id, "nodeName": node.label, "parentNodeID": upstream_node.label, }, )
async def stop_extension(self): if ComponentCache.initialized(): ComponentCache.instance(parent=self).cache_manager.stop() # terminate CacheUpdateManager
def test_processing_filename_runtime_specific_component( monkeypatch, processor, sample_metadata, tmpdir): # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Assign test resource location absolute_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", "resources", "components", "download_data.yaml")) # Read contents of given path -- read_component_definition() returns a # a dictionary of component definition content indexed by path entry_data = reader.get_entry_data({"path": absolute_path}, {}) component_definition = entry_data.definition # Instantiate a file-based component component_id = "test-component" component = Component( id=component_id, name="Download data", description="", op="download-data", catalog_type="elyra-kfp-examples-catalog", component_reference={"path": absolute_path}, definition=component_definition, properties=[], categories=[], ) # Fabricate the component cache to include single filename-based component for testing ComponentCache.instance()._component_cache[processor._type.name] = { "spoofed_catalog": { "components": { component_id: component } } } # Construct hypothetical operation for component operation_name = "Download data test" operation_params = { "url": "https://raw.githubusercontent.com/elyra-ai/elyra/master/tests/assets/helloworld.ipynb", "curl_options": "--location", } operation = Operation( id="download-data-id", type="execution_node", classifier=component_id, name=operation_name, parent_operation_ids=[], component_params=operation_params, ) # Build a mock runtime config for use in _cc_pipeline mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) # Construct single-operation pipeline pipeline = Pipeline(id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="download_data.pipeline") pipeline.operations[operation.id] = operation # Establish path and function to construct pipeline pipeline_path = os.path.join(tmpdir, "kfp_test.yaml") constructed_pipeline_function = lambda: processor._cc_pipeline( pipeline=pipeline, pipeline_name="test_pipeline") # TODO Check against both argo and tekton compilations # Compile pipeline and save into pipeline_path kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path) # Read contents of pipeline YAML with open(pipeline_path) as f: pipeline_yaml = yaml.safe_load(f.read()) # Check the pipeline file contents for correctness pipeline_template = pipeline_yaml["spec"]["templates"][0] assert pipeline_template["metadata"]["annotations"][ "pipelines.kubeflow.org/task_display_name"] == operation_name assert pipeline_template["container"]["command"][3] == operation_params[ "url"]