def test_valid_pipeline(): pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid.json") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) assert pipeline_definition.is_valid()
def _get_parent_id_list( self, pipeline_definition: PipelineDefinition, node_id_list: list, parent_list: list ) -> List: """ Helper function to return a complete list of parent node_ids :param pipeline_definition: the complete pipeline definition :param node_id_list: list of parent node ids :param parent_list: the list to add additional found parent node ids :return: """ for node_id in node_id_list: node = pipeline_definition.get_node(node_id) if node: if node.type in ["execution_node", "super_node"]: parent_list.append(node_id) node_ids = list(x.get("node_id_ref", None) for x in node.component_links) for nid in node_ids: # look-ahead to determine if node is a binding node if pipeline_definition.get_node(nid).type == "binding": node_ids.remove(nid) for super_node in pipeline_definition.get_supernodes(): if super_node.subflow_pipeline_id == nid: links = list(x.get("node_id_ref", None) for x in super_node.component_links) node_ids.append(links) self._get_parent_id_list(pipeline_definition, node_ids, parent_list) else: # binding node pass return parent_list
def _check_missing_primary_pipeline_field(field: str, error_msg: str): pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid.json") pipeline_json["pipelines"][0].pop(field) pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) assert pipeline_definition.is_valid() is False assert error_msg in pipeline_definition.validate()
def describe(json_option, pipeline_path): """ Display pipeline summary """ click.echo() print_banner("Elyra Pipeline details") indent_length = 4 blank_field = "Not Specified" blank_list = ["None Listed"] pipeline_keys = ["name", "description", "type", "version", "nodes", "file_dependencies", "component_dependencies"] iter_keys = {"file_dependencies", "component_dependencies"} pipeline_definition = _preprocess_pipeline(pipeline_path, runtime="local", runtime_config="local") primary_pipeline = PipelineDefinition(pipeline_definition=pipeline_definition).primary_pipeline describe_dict = OrderedDict() describe_dict["name"] = primary_pipeline.name describe_dict["description"] = primary_pipeline.get_property("description") describe_dict["type"] = primary_pipeline.type describe_dict["version"] = primary_pipeline.version describe_dict["nodes"] = len(primary_pipeline.nodes) describe_dict["file_dependencies"] = set() describe_dict["component_dependencies"] = set() for node in primary_pipeline.nodes: # collect information about file dependencies for dependency in node.get_component_parameter("dependencies", []): describe_dict["file_dependencies"].add(f"{dependency}") # collect information about component dependencies if node.component_source is not None: describe_dict["component_dependencies"].add(node.component_source) if not json_option: for key in pipeline_keys: readable_key = " ".join(key.title().split("_")) if key in iter_keys: click.echo(f"{readable_key}:") if describe_dict.get(key, set()) == set(): click.echo(f"{' ' * indent_length}{blank_list[0]}") else: for item in describe_dict.get(key, blank_list): click.echo(f"{' ' * indent_length}- {item}") else: click.echo(f"{readable_key}: {describe_dict.get(key, blank_field)}") else: for key in iter_keys: describe_dict[key] = list(describe_dict[key]) for key in pipeline_keys: value = describe_dict.get(key) if value is None or (key in iter_keys and len(value) == 0): describe_dict.pop(key) click.echo(json.dumps(describe_dict, indent=indent_length))
def test_updates_to_primary_pipeline_updates_pipeline_definition(): pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid.json") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) pipeline_definition.primary_pipeline.set("version", 3) assert pipeline_definition.primary_pipeline.version == 3 assert pipeline_definition.to_dict( )["pipelines"][0]["app_data"]["version"] == 3
def test_updates_to_nodes_updates_pipeline_definition(): pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid.json") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) for node in pipeline_definition.primary_pipeline.nodes: node.set_component_parameter("filename", "foo") for node in pipeline_definition.to_dict()["pipelines"][0]["nodes"]: assert node["app_data"]["component_parameters"]["filename"] == "foo"
def test_validation_flags_missing_version_field(): pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid.json") pipeline_json["pipelines"][0]["app_data"].pop("version") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) assert pipeline_definition.is_valid() is False assert "Primary pipeline is missing the 'version' field." in pipeline_definition.validate( )
def _check_pipeline_field_type(field: str, wrong_type_value: any, error_msg: str): pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid.json") pipeline_json.pop(field) pipeline_json[field] = wrong_type_value pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) assert pipeline_definition.is_valid() is False assert error_msg in pipeline_definition.validate()
def test_convert_kv_properties(monkeypatch): kv_test_property_name = "kv_test_property" pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid_with_pipeline_default.json") # Mock get_kv_properties() to ensure the "kv_test_property" variable is included in the list mock_kv_property_list = [ pipeline_constants.ENV_VARIABLES, kv_test_property_name ] monkeypatch.setattr(PipelineDefinition, "get_kv_properties", mock.Mock(return_value=mock_kv_property_list)) pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) node = pipeline_definition.primary_pipeline.nodes.pop() pipeline_defaults = pipeline_definition.primary_pipeline.get_property( pipeline_constants.PIPELINE_DEFAULTS) for kv_property in mock_kv_property_list: assert isinstance(node.get_component_parameter(kv_property), KeyValueList) assert isinstance(pipeline_defaults[kv_property], KeyValueList) # Ensure a non-list property is not converted to a KeyValueList assert not isinstance( pipeline_definition.primary_pipeline.get_property( pipeline_constants.RUNTIME_IMAGE), KeyValueList) # Ensure plain list property is not converted to a KeyValueList assert not isinstance(node.get_component_parameter("outputs"), KeyValueList)
async def test_missing_node_property_for_kubeflow_pipeline( validation_manager, monkeypatch, load_pipeline, catalog_instance): pipeline, response = load_pipeline( "kf_invalid_node_property_in_component.pipeline") node_id = "fe08b42d-bd8c-4e97-8010-0503a3185427" node_property = "notebook" pvm = validation_manager monkeypatch.setattr( pvm, "_validate_filepath", lambda node_id, file_dir, property_name, filename, response: True) pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) await pvm._validate_node_properties( pipeline_definition=pipeline_definition, response=response, pipeline_type="KUBEFLOW_PIPELINES", pipeline_runtime="kfp", ) issues = response.to_json().get("issues") assert len(issues) == 1 assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidNodeProperty" assert issues[0]["data"]["propertyName"] == node_property assert issues[0]["data"]["nodeID"] == node_id
async def test_invalid_node_property_structure(validation_manager, monkeypatch, load_pipeline): pipeline, response = load_pipeline( "generic_invalid_node_property_structure.pipeline") node_id = "88ab83dc-d5f0-443a-8837-788ed16851b7" node_property = "runtime_image" pvm = validation_manager monkeypatch.setattr( pvm, "_validate_filepath", lambda node_id, node_label, property_name, filename, response: True) monkeypatch.setattr(pvm, "_validate_label", lambda node_id, node_label, response: True) pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) await pvm._validate_node_properties( pipeline_definition=pipeline_definition, response=response, pipeline_type="GENERIC", pipeline_runtime="kfp") issues = response.to_json().get("issues") assert len(issues) == 1 assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidNodeProperty" assert issues[0]["data"]["propertyName"] == node_property assert issues[0]["data"]["nodeID"] == node_id
def _check_pipeline_correct_pipeline_alternative_name(): pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid_alternative_name.json") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) primary_pipeline = pipeline_definition.primary_pipeline assert primary_pipeline.name == "{{alternative_name}}"
def test_basic_pipeline_structure(validation_manager, load_pipeline): pipeline, response = load_pipeline( "generic_basic_pipeline_only_notebook.pipeline") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) validation_manager._validate_pipeline_structure( pipeline_definition=pipeline_definition, response=response) assert not response.has_fatal assert not response.to_json().get("issues")
def _preprocess_pipeline(pipeline_path: str, runtime: Optional[str] = None, runtime_config: Optional[str] = None) -> dict: pipeline_path = os.path.expanduser(pipeline_path) pipeline_abs_path = os.path.join(os.getcwd(), pipeline_path) pipeline_dir = os.path.dirname(pipeline_abs_path) pipeline_name = os.path.splitext(os.path.basename(pipeline_abs_path))[0] if not os.path.exists(pipeline_abs_path): raise click.ClickException( f"Pipeline file not found: '{pipeline_abs_path}'\n") try: pipeline_definition = PipelineDefinition(pipeline_abs_path) except ValueError as ve: raise click.ClickException(f"Pipeline file is invalid: \n {ve}") try: primary_pipeline = pipeline_definition.primary_pipeline except Exception as e: raise click.ClickException(e) try: for pipeline in pipeline_definition.pipelines: for node in pipeline.nodes: filename = node.get_component_parameter("filename") if filename: abs_path = os.path.join(pipeline_dir, filename) node.set_component_parameter("filename", abs_path) except Exception as e: raise click.ClickException(f"Error pre-processing pipeline: \n {e}") # update pipeline transient fields primary_pipeline.set("name", pipeline_name) primary_pipeline.set("source", os.path.basename(pipeline_abs_path)) # Only update the following if values were provided if runtime: primary_pipeline.set("runtime", runtime) if runtime_config: primary_pipeline.set("runtime_config", runtime_config) return pipeline_definition.to_dict()
def _super_node_to_operations(self, pipeline_definition: PipelineDefinition, node: Node, pipeline_object: Pipeline, super_node: Node) -> None: """Converts nodes within a super_node to operations.""" # get pipeline corresponding to super_node pipeline_id = node.subflow_pipeline_id pipeline = pipeline_definition.get_pipeline_definition(pipeline_id) # recurse to process nodes of super-node return self._nodes_to_operations(pipeline_definition, pipeline_object, pipeline.nodes, super_node)
def test_remove_env_vars_with_matching_secrets(): pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid_with_pipeline_default.json") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) node = pipeline_definition.primary_pipeline.nodes.pop() # Set kubernetes_secret property to have all the same keys as those in the env_vars property kubernetes_secrets = KeyValueList( ["var1=name1:key1", "var2=name2:key2", "var3=name3:key3"]) node.set_component_parameter(KUBERNETES_SECRETS, kubernetes_secrets) node.remove_env_vars_with_matching_secrets() assert node.get_component_parameter(ENV_VARIABLES) == []
def test_invalid_pipeline_version_that_needs_migration(validation_manager, load_pipeline): pipeline, response = load_pipeline( "generic_basic_pipeline_only_notebook.pipeline") pipeline["pipelines"][0]["app_data"]["version"] = 3 pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) validation_manager._validate_pipeline_structure( pipeline_definition=pipeline_definition, response=response) issues = response.to_json().get("issues") assert len(issues) == 1 assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidPipeline" assert "needs to be migrated" in issues[0]["message"]
async def test_pipeline_kfp_inputpath_parameter(validation_manager, load_pipeline, catalog_instance, component_cache): pipeline, response = load_pipeline("kf_inputpath_parameter.pipeline") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) await validation_manager._validate_node_properties( pipeline_definition=pipeline_definition, response=response, pipeline_type="KUBEFLOW_PIPELINES", pipeline_runtime="kfp", ) issues = response.to_json().get("issues") assert len(issues) == 0
async def test_invalid_pipeline_runtime_with_kubeflow_execution( validation_manager, load_pipeline): pipeline, response = load_pipeline( "generic_basic_pipeline_with_scripts.pipeline") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) await validation_manager._validate_compatibility( pipeline_definition=pipeline_definition, response=response, pipeline_type="APACHE_AIRFLOW", pipeline_runtime="kfp", ) issues = response.to_json().get("issues") assert len(issues) == 1 assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidRuntime"
async def test_invalid_node_op_with_airflow(validation_manager, load_pipeline): pipeline, response = load_pipeline("aa_invalid_node_op.pipeline") node_id = "749d4641-cee8-4a50-a0ed-30c07439908f" pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) await validation_manager._validate_compatibility( pipeline_definition=pipeline_definition, response=response, pipeline_type="APACHE_AIRFLOW", pipeline_runtime="airflow", ) issues = response.to_json().get("issues") assert len(issues) == 1 assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidNodeType" assert issues[0]["data"]["nodeID"] == node_id
def test_invalid_upper_pipeline_version(validation_manager, load_pipeline): pipeline, response = load_pipeline( "generic_basic_pipeline_only_notebook.pipeline") pipeline_version = PIPELINE_CURRENT_VERSION + 1 pipeline["pipelines"][0]["app_data"]["version"] = pipeline_version pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) validation_manager._validate_pipeline_structure( pipeline_definition=pipeline_definition, response=response) issues = response.to_json().get("issues") assert len(issues) == 1 assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidPipeline" assert (issues[0]["message"] == "Pipeline was last edited in a newer version of Elyra. " "Update Elyra to use this pipeline.")
async def get(self, runtime_type): self.log.debug( f"Retrieving pipeline components for runtime type: {runtime_type}") runtime_processor_type = get_runtime_processor_type( runtime_type, self.log, self.request.path) if not runtime_processor_type: raise web.HTTPError(400, f"Invalid runtime type '{runtime_type}'") # Get pipeline properties json pipeline_properties_json = PipelineDefinition.get_canvas_properties_from_template( package_name="templates/pipeline", template_name="pipeline_properties_template.jinja2") self.set_status(200) self.set_header("Content-Type", "application/json") await self.finish(pipeline_properties_json)
async def test_valid_node_property_pipeline_filepath(monkeypatch, validation_manager, load_pipeline): pipeline, response = load_pipeline("generic_basic_filepath_check.pipeline") monkeypatch.setattr(validation_manager, "_validate_label", lambda node_id, node_label, response: True) pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) await validation_manager._validate_node_properties( pipeline_definition=pipeline_definition, response=response, pipeline_type="GENERIC", pipeline_runtime="kfp") assert not response.has_fatal assert not response.to_json().get("issues")
async def test_invalid_lower_pipeline_version(validation_manager, load_pipeline): pipeline, response = load_pipeline( "generic_basic_pipeline_only_notebook.pipeline") pipeline_version = PIPELINE_CURRENT_VERSION - 1 pipeline["pipelines"][0]["app_data"]["version"] = pipeline_version pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) validation_manager._validate_pipeline_structure( pipeline_definition=pipeline_definition, response=response) issues = response.to_json().get("issues") assert len(issues) == 1 assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidPipeline" assert (issues[0]["message"] == f"Pipeline version {pipeline_version} is out of date " "and needs to be migrated using the Elyra pipeline editor.")
def _nodes_to_operations( self, pipeline_definition: PipelineDefinition, pipeline_object: Pipeline, nodes: List[Node], super_node: Optional[Node] = None, ) -> None: """ Converts each execution_node of the pipeline to its corresponding operation. If a super_node is encountered recursion is used to process its embedded nodes. If the super_node has binding nodes, those "nodes" are ignored since we handle their "functionality" by parsing the port_id_ref field to determine the node_id of the embedded node. If any node types other than execution_node, super_node or binding are encountered, a ValueError is raised indicating the unknown node type. Since the pipeline_object's operations list is updated, this method does not return a value. """ for node in nodes: # Super_nodes trigger recursion if node.type == "super_node": self._super_node_to_operations(pipeline_definition, node, pipeline_object, node) continue # skip to next node elif node.type == "binding": # We can ignore binding nodes since we're able to determine links w/o continue elif node.type == "model_node": raise NotImplementedError( f"Node type '{node.type}' is currently not supported!") elif node.type != "execution_node": raise ValueError(f"Node type '{node.type}' is invalid!") # parse each node as a pipeline operation operation = self._create_pipeline_operation(node, super_node) # assoicate user comment as docs to operations comment = pipeline_definition.get_node_comments(node.id) if comment: operation.doc = comment self.log.debug( f"Adding operation for '{operation.name}' to pipeline: {pipeline_object.name}" ) pipeline_object.operations[operation.id] = operation
async def test_invalid_runtime_node_kubeflow(validation_manager, load_pipeline, catalog_instance): pipeline, response = load_pipeline("kf_invalid_node_op.pipeline") node_id = "eace43f8-c4b1-4a25-b331-d57d4fc29426" pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) await validation_manager._validate_compatibility( pipeline_definition=pipeline_definition, response=response, pipeline_type="KUBEFLOW_PIPELINES", pipeline_runtime="kfp", ) issues = response.to_json().get("issues") print(issues) assert len(issues) == 1 assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidNodeType" assert issues[0]["data"]["nodeID"] == node_id
def test_propagate_pipeline_default_properties(monkeypatch): kv_list_correct = ["var1=var1", "var2=var2", "var3=var_three"] kv_test_property_name = "kv_test_property" pipeline_json = _read_pipeline_resource( "resources/sample_pipelines/pipeline_valid_with_pipeline_default.json") # Mock get_kv_properties() to ensure the "kv_test_property" variable is included in the list mock_kv_property_list = [ pipeline_constants.ENV_VARIABLES, kv_test_property_name ] monkeypatch.setattr(PipelineDefinition, "get_kv_properties", mock.Mock(return_value=mock_kv_property_list)) pipeline_definition = PipelineDefinition(pipeline_definition=pipeline_json) node = pipeline_definition.primary_pipeline.nodes.pop() assert node.get_component_parameter( pipeline_constants.ENV_VARIABLES) == kv_list_correct assert node.get_component_parameter( kv_test_property_name) == kv_list_correct
async def test_pipeline_invalid_kfp_inputpath_missing_connection( validation_manager, load_pipeline, catalog_instance, component_cache): invalid_node_id = "5b78ea0a-e5fc-4022-94d4-7b9dc170d794" pipeline, response = load_pipeline( "kf_invalid_inputpath_missing_connection.pipeline") pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) await validation_manager._validate_node_properties( pipeline_definition=pipeline_definition, response=response, pipeline_type="KUBEFLOW_PIPELINES", pipeline_runtime="kfp", ) issues = response.to_json().get("issues") assert len(issues) == 1 assert response.has_fatal assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidNodeProperty" assert issues[0]["data"]["nodeID"] == invalid_node_id
async def test_invalid_runtime_node_kubeflow_with_supernode( validation_manager, load_pipeline, catalog_instance): pipeline, response = load_pipeline( "kf_invalid_node_op_with_supernode.pipeline") node_id = "98aa7270-639b-42a4-9a07-b31cd0fa3205" pipeline_id = "00304a2b-dec4-4a73-ab4a-6830f97d7855" pipeline_definition = PipelineDefinition(pipeline_definition=pipeline) await validation_manager._validate_compatibility( pipeline_definition=pipeline_definition, response=response, pipeline_type="KUBEFLOW_PIPELINES", pipeline_runtime="kfp", ) issues = response.to_json().get("issues") print(issues) assert len(issues) == 1 assert issues[0]["severity"] == 1 assert issues[0]["type"] == "invalidNodeType" assert issues[0]["data"]["pipelineId"] == pipeline_id assert issues[0]["data"]["nodeID"] == node_id
def parse(self, pipeline_json: Dict) -> Pipeline: """ The pipeline definitions allow for defining multiple pipelines in one json file. When super_nodes are used, their node actually references another pipeline in the set of pipeline definitions - which is "flattened" into the overall pipeline object's list of operations. """ try: pipeline_definition = PipelineDefinition( pipeline_definition=pipeline_json) primary_pipeline = pipeline_definition.primary_pipeline except Exception as e: raise ValueError(f"Invalid Pipeline: {e}") # runtime info is only present on primary pipeline... runtime = primary_pipeline.runtime if not runtime: raise ValueError("Invalid pipeline: Missing runtime.") runtime_config = primary_pipeline.runtime_config if not runtime_config: raise ValueError( "Invalid pipeline: Missing runtime configuration.") source = primary_pipeline.source description = primary_pipeline.get_property("description") pipeline_object = Pipeline( id=primary_pipeline.id, name=primary_pipeline.name, runtime=runtime, runtime_config=runtime_config, source=source, description=description, pipeline_parameters=primary_pipeline.pipeline_parameters, ) self._nodes_to_operations(pipeline_definition, pipeline_object, primary_pipeline.nodes) return pipeline_object