def test_pipeline_process(monkeypatch, processor, parsed_pipeline, sample_metadata):

    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="airflow",
                              metadata=sample_metadata
                              )
    mocked_path = "/some-placeholder"

    monkeypatch.setattr(processor, "_get_metadata_configuration", lambda namespace, name: mocked_runtime)
    monkeypatch.setattr(processor, "create_pipeline_file",
                        lambda pipeline, pipeline_export_format, pipeline_export_path, pipeline_name: mocked_path)

    monkeypatch.setattr(github.Github, "get_repo", lambda x, y: True)
    monkeypatch.setattr(git.GithubClient, "upload_dag", lambda x, y, z: True)

    response = processor.process(pipeline=parsed_pipeline)

    assert response.run_url == sample_metadata['api_endpoint']
    assert response.object_storage_url == sample_metadata['cos_endpoint']
    # Verifies that only this substring is in the storage path since a timestamp is injected into the name
    assert "/" + sample_metadata['cos_bucket'] + "/" + "untitled" in response.object_storage_path
Exemple #2
0
def test_manager_rollback_update(tests_manager):
    metadata_name = "rollback_update"

    metadata = Metadata(**valid_metadata2_json)

    # Create the instance
    instance = tests_manager.create(metadata_name, metadata)
    original_display_name = instance.display_name
    instance.display_name = "Updated_" + original_display_name

    os.environ["METADATA_TEST_HOOK_OP"] = "update"  # Tell test class which op to raise
    # Update post-save hook will throw ModuleNotFoundError
    with pytest.raises(ModuleNotFoundError):
        tests_manager.update(metadata_name, instance)

    # Ensure the display_name is still the original value.
    instance2 = tests_manager.get(metadata_name)
    assert instance2.display_name == original_display_name

    os.environ.pop("METADATA_TEST_HOOK_OP")  # Restore normal operation
    # Ensure we can still update
    instance = tests_manager.update(metadata_name, instance)
    assert instance.display_name == "Updated_" + original_display_name
Exemple #3
0
def test_same_name_operator_in_pipeline(monkeypatch, processor, catalog_instance, parsed_pipeline, sample_metadata):
    task_id = "e3922a29-f4c0-43d9-8d8b-4509aab80032"
    upstream_task_id = "0eb57369-99d1-4cd0-a205-8d8d96af3ad4"

    mocked_runtime = Metadata(
        name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"]
    )

    monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, schemaspace=None: mocked_runtime)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True)

    pipeline_def_operation = parsed_pipeline.operations[task_id]
    pipeline_def_operation_parameters = pipeline_def_operation.component_params_as_dict
    pipeline_def_operation_bash_param = pipeline_def_operation_parameters["bash_command"]

    assert pipeline_def_operation_bash_param["activeControl"] == "NestedEnumControl"
    assert set(pipeline_def_operation_bash_param["NestedEnumControl"].keys()) == {"value", "option"}
    assert pipeline_def_operation_bash_param["NestedEnumControl"]["value"] == upstream_task_id

    ordered_operations = processor._cc_pipeline(parsed_pipeline, pipeline_name="some-name")
    operation_parameters = ordered_operations[task_id]["component_params"]
    operation_parameter_bash_command = operation_parameters["bash_command"]

    assert operation_parameter_bash_command == "\"{{ ti.xcom_pull(task_ids='BashOperator_1') }}\""
Exemple #4
0
def test_manager_rollback_delete(tests_manager):
    metadata_name = "rollback_delete"

    metadata = Metadata(**valid_metadata2_json)

    # Create the instance
    instance = tests_manager.create(metadata_name, metadata)

    os.environ["METADATA_TEST_HOOK_OP"] = "delete"  # Tell test class which op to raise
    # Delete post-save hook will throw FileNotFoundError
    with pytest.raises(FileNotFoundError):
        tests_manager.remove(metadata_name)

    # Ensure the instance still exists
    instance2 = tests_manager.get(metadata_name)
    assert instance2.display_name == instance.display_name

    os.environ.pop("METADATA_TEST_HOOK_OP")  # Restore normal operation
    # Ensure we can still delete
    tests_manager.remove(metadata_name)

    # Ensure the instance was deleted
    with pytest.raises(MetadataNotFoundError):
        tests_manager.get(metadata_name)
Exemple #5
0
def test_manager_add_remove_valid(tests_manager, schemaspace_location):
    metadata_name = "valid_add_remove"

    # Remove schemaspace_location and ensure it gets created
    _remove_schemaspace(tests_manager.metadata_store, schemaspace_location)

    metadata = Metadata(**valid_metadata_json)

    instance = tests_manager.create(metadata_name, metadata)
    assert instance is not None

    # Attempt to create again w/o replace, then replace it.
    with pytest.raises(MetadataExistsError):
        tests_manager.create(metadata_name, metadata)

    instance = tests_manager.update(metadata_name, metadata)
    assert instance is not None

    # And finally, remove it.
    tests_manager.remove(metadata_name)

    # Verify removal using metadata_store
    with pytest.raises(MetadataNotFoundError):
        tests_manager.metadata_store.fetch_instances(metadata_name)
Exemple #6
0
def test_store_store_instance(store_manager, schemaspace_location):
    # Remove schemaspace to test raw creation and confirm perms
    _remove_schemaspace(store_manager, schemaspace_location)

    metadata_name = "persist"
    metadata = Metadata(**valid_metadata_json)
    metadata_dict = metadata.prepare_write()

    instance = store_manager.store_instance(metadata_name, metadata_dict)
    assert instance is not None

    if isinstance(store_manager, FileMetadataStore):
        dir_mode = oct(os.stat(schemaspace_location).st_mode
                       & 0o777777)  # Be sure to include other attributes
        assert dir_mode == "0o40700"  # and ensure this is a directory with only rwx by owner enabled

        # Ensure file was created
        metadata_file = os.path.join(schemaspace_location, "persist.json")
        assert os.path.exists(metadata_file)
        file_mode = oct(os.stat(metadata_file).st_mode
                        & 0o777777)  # Be sure to include other attributes
        assert file_mode == "0o100600"  # and ensure this is a regular file with only rw by owner enabled

        with open(metadata_file, "r", encoding="utf-8") as f:
            valid_add = json.loads(f.read())
            assert "resource" not in valid_add
            assert "name" not in valid_add
            assert "display_name" in valid_add
            assert valid_add["display_name"] == "valid metadata instance"
            assert "schema_name" in valid_add
            assert valid_add["schema_name"] == "metadata-test"

    # Attempt to create again w/o replace, then replace it.
    with pytest.raises(MetadataExistsError):
        store_manager.store_instance(metadata_name, metadata.prepare_write())

    metadata.metadata["number_range_test"] = 10
    instance = store_manager.store_instance(metadata_name,
                                            metadata.prepare_write(),
                                            for_update=True)
    assert instance is not None
    assert instance.get("metadata")["number_range_test"] == 10
Exemple #7
0
def test_create_file(monkeypatch, processor, parsed_pipeline,
                     parsed_ordered_dict, sample_metadata):
    pipeline_json = _read_pipeline_resource(PIPELINE_FILE)

    export_pipeline_name = "some-name"
    export_file_type = "py"

    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="airflow",
                              metadata=sample_metadata)

    monkeypatch.setattr(processor,
                        "_get_metadata_configuration",
                        lambda name=None, namespace=None: mocked_runtime)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store",
                        lambda x, y, z: True)
    monkeypatch.setattr(processor, "_cc_pipeline",
                        lambda x, y: parsed_ordered_dict)

    with tempfile.TemporaryDirectory() as temp_dir:
        export_pipeline_output_path = os.path.join(
            temp_dir, f'{export_pipeline_name}.py')

        response = processor.create_pipeline_file(
            parsed_pipeline,
            pipeline_export_format=export_file_type,
            pipeline_export_path=export_pipeline_output_path,
            pipeline_name=export_pipeline_name)

        assert export_pipeline_output_path == response
        assert os.path.isfile(export_pipeline_output_path)

        file_as_lines = open(response).read().splitlines()

        # Check DAG project name
        for i in range(len(file_as_lines)):
            if "args = {" == file_as_lines[i]:
                assert "project_id" == read_key_pair(file_as_lines[i + 1],
                                                     sep=':')['key']
                assert export_pipeline_name == read_key_pair(file_as_lines[i +
                                                                           1],
                                                             sep=':')['value']

        # For every node in the original pipeline json
        for node in pipeline_json['pipelines'][0]['nodes']:
            for i in range(len(file_as_lines)):
                # Matches an op with a node ID
                if "notebook_op_" + node['id'].replace(
                        "-", "_") + " = NotebookOp(" in file_as_lines[i]:
                    sub_list_line_counter = 0
                    # Gets sub-list slice starting where the Notebook Op starts
                    for line in file_as_lines[i + 1:]:
                        if 'namespace=' in line:
                            assert sample_metadata[
                                'user_namespace'] == read_key_pair(
                                    line)['value']
                        elif 'cos_endpoint=' in line:
                            assert sample_metadata[
                                'cos_endpoint'] == read_key_pair(line)['value']
                        elif 'cos_bucket=' in line:
                            assert sample_metadata[
                                'cos_bucket'] == read_key_pair(line)['value']
                        elif 'name=' in line:
                            assert node['app_data']['ui_data'][
                                'label'] == read_key_pair(line)['value']
                        elif 'notebook=' in line:
                            assert node['app_data'][
                                'filename'] == read_key_pair(line)['value']
                        elif 'image=' in line:
                            assert node['app_data'][
                                'runtime_image'] == read_key_pair(
                                    line)['value']
                        elif 'env_vars=' in line:
                            for env in node['app_data']['env_vars']:
                                var, value = env.split("=")
                                # Gets sub-list slice starting where the env vars starts
                                for env_line in file_as_lines[
                                        i + sub_list_line_counter + 2:]:
                                    if "AWS_ACCESS_KEY_ID" in env_line:
                                        assert sample_metadata[
                                            'cos_username'] == read_key_pair(
                                                env_line, sep=':')['value']
                                    elif "AWS_SECRET_ACCESS_KEY" in env_line:
                                        assert sample_metadata[
                                            'cos_password'] == read_key_pair(
                                                env_line, sep=':')['value']
                                    elif var in env_line:
                                        assert var == read_key_pair(
                                            env_line, sep=':')['key']
                                        assert value == read_key_pair(
                                            env_line, sep=':')['value']
                                    elif env_line.strip(
                                    ) == '},':  # end of env vars
                                        break
                        elif 'pipeline_inputs=' in line and node[
                                'app_data'].get('inputs'):
                            for input in node['app_data']['inputs']:
                                assert input in string_to_list(
                                    read_key_pair(line)['value'])
                        elif 'pipeline_outputs=' in line and node[
                                'app_data'].get('outputs'):
                            for output in node['app_data']['outputs']:
                                assert output in string_to_list(
                                    read_key_pair(line)['value'])
                        elif line == ')':  # End of this Notebook Op
                            break
                        sub_list_line_counter += 1
Exemple #8
0
def test_create_file_custom_components(
    monkeypatch, processor, catalog_instance, component_cache, parsed_pipeline, parsed_ordered_dict, sample_metadata
):
    pipeline_json = _read_pipeline_resource(PIPELINE_FILE_CUSTOM_COMPONENTS)

    export_pipeline_name = "some-name"
    export_file_type = "py"

    mocked_runtime = Metadata(
        name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"]
    )

    monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, schemaspace=None: mocked_runtime)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True)
    monkeypatch.setattr(processor, "_cc_pipeline", lambda x, y: parsed_ordered_dict)

    with tempfile.TemporaryDirectory() as temp_dir:
        export_pipeline_output_path = os.path.join(temp_dir, f"{export_pipeline_name}.py")

        response = processor.create_pipeline_file(
            parsed_pipeline,
            pipeline_export_format=export_file_type,
            pipeline_export_path=export_pipeline_output_path,
            pipeline_name=export_pipeline_name,
        )

        assert export_pipeline_output_path == response
        assert os.path.isfile(export_pipeline_output_path)

        file_as_lines = open(response).read().splitlines()

        pipeline_description = pipeline_json["pipelines"][0]["app_data"]["properties"]["description"]
        escaped_description = pipeline_description.replace('"""', '\\"\\"\\"')

        for i in range(len(file_as_lines)):
            if "args = {" == file_as_lines[i]:
                # Check DAG project name
                assert "project_id" == read_key_pair(file_as_lines[i + 1], sep=":")["key"]
                assert export_pipeline_name == read_key_pair(file_as_lines[i + 1], sep=":")["value"]
            elif 'description="""' in file_as_lines[i]:
                # Check that DAG contains the correct description
                line_no = i + 1
                description_as_lines = []
                while '"""' not in file_as_lines[line_no]:
                    description_as_lines.append(file_as_lines[line_no])
                    line_no += 1
                expected_description_lines = escaped_description.split("\n")
                assert description_as_lines == expected_description_lines

                # Nothing more to be done in file
                break

        # For every node in the original pipeline json
        for node in pipeline_json["pipelines"][0]["nodes"]:
            component_parameters = node["app_data"]["component_parameters"]
            for i in range(len(file_as_lines)):
                # Matches custom component operators
                if f"op_{node['id'].replace('-', '_')} = " in file_as_lines[i]:
                    for parameter in component_parameters:
                        # Find 'parameter=' clause in file_as_lines list
                        r = re.compile(rf"\s*{parameter}=.*")
                        parameter_clause = i + 1
                        assert len(list(filter(r.match, file_as_lines[parameter_clause:]))) > 0
Exemple #9
0
def test_create_file(monkeypatch, processor, parsed_pipeline, parsed_ordered_dict, sample_metadata):
    pipeline_json = _read_pipeline_resource(PIPELINE_FILE_COMPLEX)

    export_pipeline_name = "some-name"
    export_file_type = "py"

    mocked_runtime = Metadata(
        name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"]
    )

    monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, schemaspace=None: mocked_runtime)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True)
    monkeypatch.setattr(processor, "_cc_pipeline", lambda x, y: parsed_ordered_dict)

    with tempfile.TemporaryDirectory() as temp_dir:
        export_pipeline_output_path = os.path.join(temp_dir, f"{export_pipeline_name}.py")

        response = processor.create_pipeline_file(
            parsed_pipeline,
            pipeline_export_format=export_file_type,
            pipeline_export_path=export_pipeline_output_path,
            pipeline_name=export_pipeline_name,
        )

        assert export_pipeline_output_path == response
        assert os.path.isfile(export_pipeline_output_path)

        file_as_lines = open(response).read().splitlines()

        assert "from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator" in file_as_lines

        # Check DAG project name
        for i in range(len(file_as_lines)):
            if "args = {" == file_as_lines[i]:
                assert "project_id" == read_key_pair(file_as_lines[i + 1], sep=":")["key"]
                assert export_pipeline_name == read_key_pair(file_as_lines[i + 1], sep=":")["value"]

        # For every node in the original pipeline json
        for node in pipeline_json["pipelines"][0]["nodes"]:
            component_parameters = node["app_data"]["component_parameters"]
            for i in range(len(file_as_lines)):
                # Matches a generic op with a node ID
                if f"op_{node['id'].replace('-', '_')} = KubernetesPodOperator(" in file_as_lines[i]:
                    sub_list_line_counter = 0
                    # Gets sub-list slice starting where the Notebook Op starts
                    init_line = i + 1
                    for line in file_as_lines[init_line:]:
                        if "namespace=" in line:
                            assert sample_metadata["metadata"]["user_namespace"] == read_key_pair(line)["value"]
                        elif "cos_endpoint=" in line:
                            assert sample_metadata["metadata"]["cos_endpoint"] == read_key_pair(line)["value"]
                        elif "cos_bucket=" in line:
                            assert sample_metadata["metadata"]["cos_bucket"] == read_key_pair(line)["value"]
                        elif "name=" in line:
                            assert node["app_data"]["ui_data"]["label"] == read_key_pair(line)["value"]
                        elif "notebook=" in line:
                            assert component_parameters["filename"] == read_key_pair(line)["value"]
                        elif "image=" in line:
                            assert component_parameters["runtime_image"] == read_key_pair(line)["value"]
                        elif "env_vars=" in line:
                            for env in component_parameters["env_vars"]:
                                var, value = env.split("=")
                                # Gets sub-list slice starting where the env vars starts
                                start_env = i + sub_list_line_counter + 2
                                for env_line in file_as_lines[start_env:]:
                                    if "AWS_ACCESS_KEY_ID" in env_line:
                                        assert (
                                            sample_metadata["metadata"]["cos_username"]
                                            == read_key_pair(env_line, sep=":")["value"]
                                        )
                                    elif "AWS_SECRET_ACCESS_KEY" in env_line:
                                        assert (
                                            sample_metadata["metadata"]["cos_password"]
                                            == read_key_pair(env_line, sep=":")["value"]
                                        )
                                    elif var in env_line:
                                        assert var == read_key_pair(env_line, sep=":")["key"]
                                        assert value == read_key_pair(env_line, sep=":")["value"]
                                    elif env_line.strip() == "},":  # end of env vars
                                        break
                        elif "pipeline_inputs=" in line and component_parameters.get("inputs"):
                            for input in component_parameters["inputs"]:
                                assert input in string_to_list(read_key_pair(line)["value"])
                        elif "pipeline_outputs=" in line and component_parameters.get("outputs"):
                            for output in component_parameters["outputs"]:
                                assert output in string_to_list(read_key_pair(line)["value"])
                        elif line == ")":  # End of this Notebook Op
                            break
                        sub_list_line_counter += 1
async def test_modify_component_catalogs(component_cache,
                                         metadata_manager_with_teardown,
                                         create_inprocess):
    # Get initial set of components
    initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR)

    # Create new registry instance with a single URL-based component
    urls = [
        "https://raw.githubusercontent.com/elyra-ai/elyra/master/elyra/tests/pipeline/resources/components/"
        "airflow_test_operator.py"
    ]

    instance_metadata = {
        "description": "A test registry",
        "runtime_type": RUNTIME_PROCESSOR.name,
        "categories": ["New Components"],
        "paths": urls,
    }
    registry_instance = Metadata(schema_name="url-catalog",
                                 name=TEST_CATALOG_NAME,
                                 display_name="New Test Registry",
                                 metadata=instance_metadata)

    if create_inprocess:
        metadata_manager_with_teardown.create(TEST_CATALOG_NAME,
                                              registry_instance)
    else:
        res: CompletedProcess = run([
            "elyra-metadata",
            "install",
            "component-catalogs",
            f"--schema_name={registry_instance.schema_name}",
            f"--json={registry_instance.to_json()}",
            f"--name={TEST_CATALOG_NAME}",
        ])
        assert res.returncode == 0

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Get new set of components from all active registries, including added test registry
    components_after_create = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_create) == len(initial_components) + 3

    added_component_names = [
        component.name for component in components_after_create
    ]
    assert "TestOperator" in added_component_names
    assert "TestOperatorNoInputs" not in added_component_names

    # Modify the test registry to add an additional path to
    urls.append(
        "https://raw.githubusercontent.com/elyra-ai/elyra/master/elyra/tests/pipeline/resources/components"
        "/airflow_test_operator_no_inputs.py")
    metadata_manager_with_teardown.update(TEST_CATALOG_NAME, registry_instance)

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Get set of components from all active registries, including modified test registry
    components_after_update = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_update) == len(initial_components) + 4

    modified_component_names = [
        component.name for component in components_after_update
    ]
    assert "TestOperator" in modified_component_names
    assert "TestOperatorNoInputs" in modified_component_names

    # Delete the test registry
    metadata_manager_with_teardown.remove(TEST_CATALOG_NAME)

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Check that components remaining after delete are the same as before the new catalog was added
    components_after_remove = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_remove) == len(initial_components)
Exemple #11
0
def test_processing_filename_runtime_specific_component(
        monkeypatch, processor, sample_metadata, tmpdir):
    # Define the appropriate reader for a filesystem-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)

    # Assign test resource location
    absolute_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), "..", "resources",
                     "components", "download_data.yaml"))

    # Read contents of given path -- read_component_definition() returns a
    # a dictionary of component definition content indexed by path
    entry_data = reader.get_entry_data({"path": absolute_path}, {})
    component_definition = entry_data.definition

    # Instantiate a file-based component
    component_id = "test-component"
    component = Component(
        id=component_id,
        name="Download data",
        description="",
        op="download-data",
        catalog_type="elyra-kfp-examples-catalog",
        component_reference={"path": absolute_path},
        definition=component_definition,
        properties=[],
        categories=[],
    )

    # Fabricate the component cache to include single filename-based component for testing
    ComponentCache.instance()._component_cache[processor._type.name] = {
        "spoofed_catalog": {
            "components": {
                component_id: component
            }
        }
    }

    # Construct hypothetical operation for component
    operation_name = "Download data test"
    operation_params = {
        "url":
        "https://raw.githubusercontent.com/elyra-ai/elyra/master/tests/assets/helloworld.ipynb",
        "curl_options": "--location",
    }
    operation = Operation(
        id="download-data-id",
        type="execution_node",
        classifier=component_id,
        name=operation_name,
        parent_operation_ids=[],
        component_params=operation_params,
    )

    # Build a mock runtime config for use in _cc_pipeline
    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="kfp",
                              metadata=sample_metadata)

    mocked_func = mock.Mock(return_value="default",
                            side_effect=[mocked_runtime, sample_metadata])
    monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)

    # Construct single-operation pipeline
    pipeline = Pipeline(id="pipeline-id",
                        name="kfp_test",
                        runtime="kfp",
                        runtime_config="test",
                        source="download_data.pipeline")
    pipeline.operations[operation.id] = operation

    # Establish path and function to construct pipeline
    pipeline_path = os.path.join(tmpdir, "kfp_test.yaml")
    constructed_pipeline_function = lambda: processor._cc_pipeline(
        pipeline=pipeline, pipeline_name="test_pipeline")

    # TODO Check against both argo and tekton compilations
    # Compile pipeline and save into pipeline_path
    kfp_argo_compiler.Compiler().compile(constructed_pipeline_function,
                                         pipeline_path)

    # Read contents of pipeline YAML
    with open(pipeline_path) as f:
        pipeline_yaml = yaml.safe_load(f.read())

    # Check the pipeline file contents for correctness
    pipeline_template = pipeline_yaml["spec"]["templates"][0]
    assert pipeline_template["metadata"]["annotations"][
        "pipelines.kubeflow.org/task_display_name"] == operation_name
    assert pipeline_template["container"]["command"][3] == operation_params[
        "url"]
async def test_modify_component_catalogs(jp_environ, component_cache,
                                         metadata_manager_with_teardown,
                                         create_inprocess):
    # Get initial set of components
    initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR)

    # Create new registry instance with a single URL-based component
    paths = [_get_resource_path("kfp_test_operator.yaml")]

    instance_metadata = {
        "description": "A test registry",
        "runtime_type": RUNTIME_PROCESSOR.name,
        "categories": ["New Components"],
        "paths": paths,
    }
    registry_instance = Metadata(
        schema_name="local-file-catalog",
        name=TEST_CATALOG_NAME,
        display_name="New Test Registry",
        metadata=instance_metadata,
    )

    if create_inprocess:
        metadata_manager_with_teardown.create(TEST_CATALOG_NAME,
                                              registry_instance)
    else:
        res: CompletedProcess = run([
            "elyra-metadata",
            "install",
            "component-catalogs",
            f"--schema_name={registry_instance.schema_name}",
            f"--json={registry_instance.to_json()}",
            f"--name={TEST_CATALOG_NAME}",
        ])
        assert res.returncode == 0

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Get new set of components from all active registries, including added test registry
    components_after_create = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_create) == len(initial_components) + 1

    added_component_names = [
        component.name for component in components_after_create
    ]
    assert "Test Operator" in added_component_names
    assert "Test Operator No Inputs" not in added_component_names

    # Modify the test registry to add a path to the catalog instance
    paths.append(_get_resource_path("kfp_test_operator_no_inputs.yaml"))
    metadata_manager_with_teardown.update(TEST_CATALOG_NAME, registry_instance)

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Get set of components from all active registries, including modified test registry
    components_after_update = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_update) == len(initial_components) + 2

    modified_component_names = [
        component.name for component in components_after_update
    ]
    assert "Test Operator" in modified_component_names
    assert "Test Operator No Inputs" in modified_component_names

    # Delete the test registry
    metadata_manager_with_teardown.remove(TEST_CATALOG_NAME)

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Check that components remaining after delete are the same as before the new catalog was added
    components_after_remove = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_remove) == len(initial_components)
Exemple #13
0
    def start(self):
        super().start()  # process options

        # Get known options, then gather display_name and build metadata dict.
        name = self.name_option.value
        schema_name = self.schema_name_option.value
        display_name = None

        metadata = {}
        # Walk the options looking for SchemaProperty instances. Any MetadataSchemaProperty instances go
        # into the metadata dict.  Note that we process JSONBasedOptions (--json or --file) prior to
        # MetadataSchemaProperty types since the former will set the base metadata stanza and individual
        # values can be used to override the former's content (like BYO authentication OVPs, for example).
        for option in self.options:
            if isinstance(option, MetadataSchemaProperty):
                # skip adding any non required properties that have no value (unless its a null type).
                if not option.required and not option.value and option.type != "null":
                    continue
                metadata[option.name] = option.value
            elif isinstance(option, SchemaProperty):
                if option.name == "display_name":  # Be sure we have a display_name
                    display_name = option.value
                    continue
            elif isinstance(option, JSONBasedOption):
                metadata.update(option.metadata)

        if display_name is None and self.replace_flag.value is False:  # Only require on create
            self.log_and_exit(
                f"Could not determine display_name from schema '{schema_name}'"
            )

        ex_msg = None
        new_instance = None
        try:
            if self.replace_flag.value:  # if replacing, fetch the instance so it can be updated
                updated_instance = self.metadata_manager.get(name)
                updated_instance.schema_name = schema_name
                if display_name:
                    updated_instance.display_name = display_name
                updated_instance.metadata.update(metadata)
                new_instance = self.metadata_manager.update(
                    name, updated_instance)
            else:  # create a new instance
                instance = Metadata(schema_name=schema_name,
                                    name=name,
                                    display_name=display_name,
                                    metadata=metadata)
                new_instance = self.metadata_manager.create(name, instance)
        except Exception as ex:
            ex_msg = str(ex)

        if new_instance:
            print(
                f"Metadata instance '{new_instance.name}' for schema '{schema_name}' has been written "
                f"to: {new_instance.resource}")
        else:
            if ex_msg:
                self.log_and_exit(
                    f"The following exception occurred saving metadata instance "
                    f"for schema '{schema_name}': {ex_msg}",
                    display_help=False,
                )
            else:
                self.log_and_exit(
                    f"A failure occurred saving metadata instance '{name}' for "
                    f"schema '{schema_name}'.",
                    display_help=False,
                )
Exemple #14
0
def test_cc_pipeline_component_no_input(monkeypatch, processor,
                                        component_cache, sample_metadata,
                                        tmpdir):
    """
    Verifies that cc_pipeline can handle KFP component definitions that don't
    include any inputs
    """
    # Define the appropriate reader for a filesystem-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)

    # Assign test resource location
    cpath = (Path(__file__).parent / ".." / "resources" / "components" /
             "kfp_test_operator_no_inputs.yaml").resolve()
    assert cpath.is_file()
    cpath = str(cpath)

    # Read contents of given path -- read_component_definition() returns a
    # a dictionary of component definition content indexed by path
    entry_data = reader.get_entry_data({"path": cpath}, {})
    component_definition = entry_data.definition

    # Instantiate a file-based component
    component_id = "test-component"
    component = Component(
        id=component_id,
        name="No input data",
        description="",
        op="no-input-data",
        catalog_type="elyra-kfp-examples-catalog",
        component_reference={"path": cpath},
        definition=component_definition,
        properties=[],
        categories=[],
    )

    # Fabricate the component cache to include single filename-based component for testing
    component_cache._component_cache[processor._type.name] = {
        "spoofed_catalog": {
            "components": {
                component_id: component
            }
        }
    }

    # Construct hypothetical operation for component
    operation_name = "no-input-test"
    operation_params = {}
    operation = Operation(
        id="no-input-id",
        type="execution_node",
        classifier=component_id,
        name=operation_name,
        parent_operation_ids=[],
        component_params=operation_params,
    )

    # Build a mock runtime config for use in _cc_pipeline
    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="kfp",
                              metadata=sample_metadata)

    mocked_func = mock.Mock(return_value="default",
                            side_effect=[mocked_runtime, sample_metadata])
    monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)

    # Construct single-operation pipeline
    pipeline = Pipeline(id="pipeline-id",
                        name="kfp_test",
                        runtime="kfp",
                        runtime_config="test",
                        source="no_input.pipeline")
    pipeline.operations[operation.id] = operation

    constructed_pipeline_function = lambda: processor._cc_pipeline(
        pipeline=pipeline, pipeline_name="test_pipeline")
    pipeline_path = str(Path(tmpdir) / "no_inputs_test.yaml")

    # Compile pipeline and save into pipeline_path
    kfp_argo_compiler.Compiler().compile(constructed_pipeline_function,
                                         pipeline_path)
async def test_directory_based_component_catalog(
    component_cache, metadata_manager_with_teardown, create_inprocess, tmpdir
):
    # Verify that the component cache is empty to prevent other tests
    # from having an impact on this' tests result
    initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR)
    assert len(initial_components) == 0, initial_components[0].name

    # Create and populate a temporary catalog directory
    catalog_dir = Path(tmpdir) / "catalog"
    catalog_dir.mkdir()
    # Copy a few YAML files from ../resources/components to
    # the catalog directory
    directory_entries = {"download_data.yaml": None, "kfp_test_operator_no_inputs.yaml": None}
    for file in directory_entries:
        with open(_get_resource_path(file), "r") as fh_in:
            # read file
            data = fh_in.read()
            # extract and store component name
            directory_entries[file] = yaml.safe_load(data)["name"]
            # write (unchanged) file to destination
            with open(catalog_dir / file, "w") as fh_out:
                fh_out.write(data)
        # make sure the file exists in the destination
        assert (catalog_dir / file).is_file()

    # Create new directory-based registry
    instance_metadata = {
        "description": "A test registry",
        "runtime_type": RUNTIME_PROCESSOR.name,
        "categories": ["New Components"],
        "paths": [str(catalog_dir)],
    }
    registry_instance = Metadata(
        schema_name="local-directory-catalog",
        name=TEST_CATALOG_NAME,
        display_name="New Test Registry",
        metadata=instance_metadata,
    )

    if create_inprocess:
        metadata_manager_with_teardown.create(TEST_CATALOG_NAME, registry_instance)
    else:
        res: CompletedProcess = run(
            [
                "elyra-metadata",
                "install",
                "component-catalogs",
                f"--schema_name={registry_instance.schema_name}",
                f"--json={registry_instance.to_json()}",
                f"--name={TEST_CATALOG_NAME}",
            ]
        )
        assert res.returncode == 0

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Verify that the number of components in the cache equals the number of
    # components in the directory catalog
    components_after_create = component_cache.get_all_components(RUNTIME_PROCESSOR)
    assert len(components_after_create) == len(directory_entries), components_after_create

    # Verify the component names
    added_component_names = [component.name for component in components_after_create]
    for component in directory_entries:
        assert directory_entries[component] in added_component_names

    # Delete the test registry and wait for updates to complete
    metadata_manager_with_teardown.remove(TEST_CATALOG_NAME)
    component_cache.wait_for_all_cache_tasks()
Exemple #16
0
    def start(self):
        super().start()  # process options

        src_directory = self.directory_option.value

        try:
            json_files = [
                f for f in os.listdir(src_directory) if f.endswith(".json")
            ]
        except OSError as e:
            print(
                f"Unable to reach the '{src_directory}' directory: {e.strerror}: '{e.filename}'"
            )
            self.exit(1)

        if len(json_files) == 0:
            print(
                f"No instances for import found in the '{src_directory}' directory"
            )
            return

        metadata_file = None
        non_imported_files = []

        for file in json_files:
            filepath = os.path.join(src_directory, file)
            try:
                with open(filepath) as f:
                    metadata_file = json.loads(f.read())
            except OSError as e:
                non_imported_files.append([file, e.strerror])
                continue

            name = os.path.splitext(file)[0]
            try:
                schema_name = metadata_file["schema_name"]
                display_name = metadata_file["display_name"]
                metadata = metadata_file["metadata"]
            except KeyError as e:
                non_imported_files.append([
                    file,
                    f"Could not find '{e.args[0]}' key in the import file '{filepath}'"
                ])
                continue

            try:
                if self.overwrite_flag.value:  # if overwrite flag is true
                    try:  # try updating the existing instance
                        updated_instance = self.metadata_manager.get(name)
                        updated_instance.schema_name = schema_name
                        if display_name:
                            updated_instance.display_name = display_name
                        if name:
                            updated_instance.name = name
                        updated_instance.metadata.update(metadata)
                        self.metadata_manager.update(name, updated_instance)
                    except MetadataNotFoundError:  # no existing instance - create new
                        instance = Metadata(schema_name=schema_name,
                                            name=name,
                                            display_name=display_name,
                                            metadata=metadata)
                        self.metadata_manager.create(name, instance)
                else:
                    instance = Metadata(schema_name=schema_name,
                                        name=name,
                                        display_name=display_name,
                                        metadata=metadata)
                    self.metadata_manager.create(name, instance)
            except Exception as e:
                if isinstance(e, MetadataExistsError):
                    non_imported_files.append(
                        [file, f"{str(e)} Use '--overwrite' to update."])
                else:
                    non_imported_files.append([file, str(e)])

        instance_count_not_imported = len(non_imported_files)
        instance_count_imported = len(json_files) - instance_count_not_imported

        print(f"Imported {instance_count_imported} " +
              ("instance" if instance_count_imported == 1 else "instances"))

        if instance_count_not_imported > 0:
            print(f"{instance_count_not_imported} " +
                  ("instance" if instance_count_not_imported ==
                   1 else "instances") + " could not be imported")

            non_imported_files.sort(key=lambda x: x[0])
            print("\nThe following files could not be imported: ")

            # pad to width of longest file and reason
            max_file_name_len = len("File")
            max_reason_len = len("Reason")
            for file in non_imported_files:
                max_file_name_len = max(len(file[0]), max_file_name_len)
                max_reason_len = max(len(file[1]), max_reason_len)

            print(
                f"{'File'.ljust(max_file_name_len)}   {'Reason'.ljust(max_reason_len)}"
            )
            print(
                f"{'----'.ljust(max_file_name_len)}   {'------'.ljust(max_reason_len)}"
            )
            for file in non_imported_files:
                print(
                    f"{file[0].ljust(max_file_name_len)}   {file[1].ljust(max_reason_len)}"
                )