def test_fail_pipelines_are_equal(good_pipeline): test_operations_dict = {"123123123": good_operation, "234234234": good_operation} compare_pipeline = Pipeline( id="Random-UUID-123123123123123", name="test-pipeline", runtime="kfp", runtime_config="default_kfp" ) for key, operation in test_operations_dict.items(): compare_pipeline.operations[key] = operation with pytest.raises(AssertionError): assert compare_pipeline == good_pipeline
def construct_pipeline(name: str, nodes: List[NodeBase], location, runtime_type: Optional[str] = 'local', runtime_config: Optional[str] = 'local') -> Pipeline: """Returns an instance of a local Pipeline consisting of each node and populates the specified location with the necessary files to run the pipeline from that location. """ pipeline = Pipeline(str(uuid.uuid4()), name, runtime_type, runtime_config) for node in nodes: pipeline.operations[node.id] = node.get_operation() # copy the node file into the "working directory" if isinstance(node, NotebookNode): src_file = os.path.join(os.path.dirname(__file__), 'resources/node_util/node.ipynb') elif isinstance(node, PythonNode): src_file = os.path.join(os.path.dirname(__file__), 'resources/node_util/node.py') else: assert False, f"Invalid node type detected: {node.__class__.__name__}" shutil.copy(src_file, os.path.join(location, node.filename)) # copy the node_util directory into the "working directory" shutil.copytree( os.path.join(os.path.dirname(__file__), 'resources/node_util'), os.path.join(location, 'node_util')) return pipeline
def test_pipelines_are_equal(good_pipeline): compare_pipeline = Pipeline(id="Random-UUID-123123123123123", name="test-pipeline", runtime="kfp", runtime_config="default_kfp") assert compare_pipeline == good_pipeline
def parse(self, pipeline_json: Dict) -> Pipeline: """ The pipeline definitions allow for defining multiple pipelines in one json file. When super_nodes are used, their node actually references another pipeline in the set of pipeline definitions - which is "flattened" into the overall pipeline object's list of operations. """ try: pipeline_definition = PipelineDefinition( pipeline_definition=pipeline_json) primary_pipeline = pipeline_definition.primary_pipeline except Exception as e: raise ValueError(f"Invalid Pipeline: {e}") # runtime info is only present on primary pipeline... runtime = primary_pipeline.runtime if not runtime: raise ValueError("Invalid pipeline: Missing runtime.") runtime_config = primary_pipeline.runtime_config if not runtime_config: raise ValueError( "Invalid pipeline: Missing runtime configuration.") source = primary_pipeline.source description = primary_pipeline.get_property("description") pipeline_object = Pipeline( id=primary_pipeline.id, name=primary_pipeline.name, runtime=runtime, runtime_config=runtime_config, source=source, description=description, pipeline_parameters=primary_pipeline.pipeline_parameters, ) self._nodes_to_operations(pipeline_definition, pipeline_object, primary_pipeline.nodes) return pipeline_object
def test_processing_filename_runtime_specific_component( monkeypatch, processor, sample_metadata, tmpdir): # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Assign test resource location absolute_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", "resources", "components", "download_data.yaml")) # Read contents of given path -- read_component_definition() returns a # a dictionary of component definition content indexed by path entry_data = reader.get_entry_data({"path": absolute_path}, {}) component_definition = entry_data.definition # Instantiate a file-based component component_id = "test-component" component = Component( id=component_id, name="Download data", description="", op="download-data", catalog_type="elyra-kfp-examples-catalog", component_reference={"path": absolute_path}, definition=component_definition, properties=[], categories=[], ) # Fabricate the component cache to include single filename-based component for testing ComponentCache.instance()._component_cache[processor._type.name] = { "spoofed_catalog": { "components": { component_id: component } } } # Construct hypothetical operation for component operation_name = "Download data test" operation_params = { "url": "https://raw.githubusercontent.com/elyra-ai/elyra/master/tests/assets/helloworld.ipynb", "curl_options": "--location", } operation = Operation( id="download-data-id", type="execution_node", classifier=component_id, name=operation_name, parent_operation_ids=[], component_params=operation_params, ) # Build a mock runtime config for use in _cc_pipeline mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) # Construct single-operation pipeline pipeline = Pipeline(id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="download_data.pipeline") pipeline.operations[operation.id] = operation # Establish path and function to construct pipeline pipeline_path = os.path.join(tmpdir, "kfp_test.yaml") constructed_pipeline_function = lambda: processor._cc_pipeline( pipeline=pipeline, pipeline_name="test_pipeline") # TODO Check against both argo and tekton compilations # Compile pipeline and save into pipeline_path kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path) # Read contents of pipeline YAML with open(pipeline_path) as f: pipeline_yaml = yaml.safe_load(f.read()) # Check the pipeline file contents for correctness pipeline_template = pipeline_yaml["spec"]["templates"][0] assert pipeline_template["metadata"]["annotations"][ "pipelines.kubeflow.org/task_display_name"] == operation_name assert pipeline_template["container"]["command"][3] == operation_params[ "url"]
def good_pipeline(): test_pipeline = Pipeline(id="Random-UUID-123123123123123", name="test-pipeline", runtime="kfp", runtime_config="default_kfp") return test_pipeline
def test_fail_create_pipeline_missing_runtime_config(): with pytest.raises(TypeError): Pipeline(id="Random-UUID-123123123123123", name="test-pipeline", runtime="kfp")
def test_fail_create_pipeline_missing_name(): with pytest.raises(TypeError): Pipeline(id="Random-UUID-123123123123123", runtime="kfp", runtime_config="default_kfp")
def test_fail_create_pipeline_missing_id(): with pytest.raises(TypeError): Pipeline(name="test-pipeline", runtime="kfp", runtime_config="default_kfp")
def test_cc_pipeline_component_no_input(monkeypatch, processor, component_cache, sample_metadata, tmpdir): """ Verifies that cc_pipeline can handle KFP component definitions that don't include any inputs """ # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Assign test resource location cpath = (Path(__file__).parent / ".." / "resources" / "components" / "kfp_test_operator_no_inputs.yaml").resolve() assert cpath.is_file() cpath = str(cpath) # Read contents of given path -- read_component_definition() returns a # a dictionary of component definition content indexed by path entry_data = reader.get_entry_data({"path": cpath}, {}) component_definition = entry_data.definition # Instantiate a file-based component component_id = "test-component" component = Component( id=component_id, name="No input data", description="", op="no-input-data", catalog_type="elyra-kfp-examples-catalog", component_reference={"path": cpath}, definition=component_definition, properties=[], categories=[], ) # Fabricate the component cache to include single filename-based component for testing component_cache._component_cache[processor._type.name] = { "spoofed_catalog": { "components": { component_id: component } } } # Construct hypothetical operation for component operation_name = "no-input-test" operation_params = {} operation = Operation( id="no-input-id", type="execution_node", classifier=component_id, name=operation_name, parent_operation_ids=[], component_params=operation_params, ) # Build a mock runtime config for use in _cc_pipeline mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) # Construct single-operation pipeline pipeline = Pipeline(id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="no_input.pipeline") pipeline.operations[operation.id] = operation constructed_pipeline_function = lambda: processor._cc_pipeline( pipeline=pipeline, pipeline_name="test_pipeline") pipeline_path = str(Path(tmpdir) / "no_inputs_test.yaml") # Compile pipeline and save into pipeline_path kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path)
def process(self, pipeline: Pipeline) -> None: """ Submit the pipeline for execution on Apache Airflow. """ t0_all = time.time() timestamp = datetime.now().strftime("%m%d%H%M%S") # Create an instance id that will be used to store # the pipelines' dependencies, if applicable pipeline_instance_id = f"{pipeline.name}-{timestamp}" runtime_configuration = self._get_metadata_configuration( schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID, name=pipeline.runtime_config ) api_endpoint = runtime_configuration.metadata.get("api_endpoint") cos_endpoint = runtime_configuration.metadata.get("cos_endpoint") cos_bucket = runtime_configuration.metadata.get("cos_bucket") git_type = SupportedGitTypes.get_instance_by_name( runtime_configuration.metadata.get("git_type", SupportedGitTypes.GITHUB.name) ) if git_type == SupportedGitTypes.GITLAB and SupportedGitTypes.is_enabled(SupportedGitTypes.GITLAB) is False: raise ValueError( "Python package `python-gitlab` is not installed. " "Please install using `elyra[gitlab]` to use GitLab as DAG repository." ) github_api_endpoint = runtime_configuration.metadata.get("github_api_endpoint") github_repo_token = runtime_configuration.metadata.get("github_repo_token") github_repo = runtime_configuration.metadata.get("github_repo") github_branch = runtime_configuration.metadata.get("github_branch") self.log_pipeline_info(pipeline.name, "Submitting pipeline") with tempfile.TemporaryDirectory() as temp_dir: pipeline_export_path = os.path.join(temp_dir, f"{pipeline.name}.py") self.log.debug(f"Creating temp directory '{temp_dir}'") pipeline_filepath = self.create_pipeline_file( pipeline=pipeline, pipeline_export_format="py", pipeline_export_path=pipeline_export_path, pipeline_name=pipeline.name, pipeline_instance_id=pipeline_instance_id, ) self.log.debug(f"Uploading pipeline file '{pipeline_filepath}'") try: if git_type == SupportedGitTypes.GITHUB: git_client = GithubClient( server_url=github_api_endpoint, token=github_repo_token, repo=github_repo, branch=github_branch ) else: git_client = GitLabClient( server_url=github_api_endpoint, token=github_repo_token, project=github_repo, branch=github_branch, ) except BaseException as be: raise RuntimeError(f"Unable to create a connection to {github_api_endpoint}: {str(be)}") from be git_client.upload_dag(pipeline_filepath, pipeline_instance_id) self.log.info("Waiting for Airflow Scheduler to process and start the pipeline") download_url = git_client.get_git_url( api_url=github_api_endpoint, repository_name=github_repo, repository_branch=github_branch ) self.log_pipeline_info( pipeline.name, f"pipeline pushed to git: {download_url}", duration=(time.time() - t0_all) ) if pipeline.contains_generic_operations(): object_storage_url = f"{cos_endpoint}" os_path = join_paths(pipeline.pipeline_parameters.get(COS_OBJECT_PREFIX), pipeline_instance_id) object_storage_path = f"/{cos_bucket}/{os_path}" else: object_storage_url = None object_storage_path = None return AirflowPipelineProcessorResponse( git_url=f"{download_url}", run_url=f"{api_endpoint}", object_storage_url=object_storage_url, object_storage_path=object_storage_path, )