Example #1
0
def sample_image_metadata():
    image_one = {"image_name": "tensorflow/tensorflow:2.0.0-py3", "pull_policy": "IfNotPresent", "tags": []}
    image_two = {"image_name": "elyra/examples:1.0.0-py3", "pull_policy": "Always", "tags": []}

    mocked_runtime_images = [
        Metadata(name="test-image-metadata", display_name="test-image", schema_name="airflow", metadata=image_one),
        Metadata(name="test-image-metadata", display_name="test-image", schema_name="airflow", metadata=image_two),
    ]

    return mocked_runtime_images
Example #2
0
def test_manager_hierarchy_create(tests_hierarchy_manager,
                                  schemaspace_location):

    # Note, this is really more of an update test (replace = True), since you cannot "create" an
    # instance if it already exists - which, in this case, it exists in the factory area

    metadata = Metadata(**byo_metadata_json)
    metadata.display_name = "user"
    with pytest.raises(MetadataExistsError):
        tests_hierarchy_manager.create("byo_2", metadata)

    instance = tests_hierarchy_manager.update("byo_2", metadata)
    assert instance is not None
    assert instance.resource.startswith(str(schemaspace_location))

    metadata_list = tests_hierarchy_manager.get_all()
    assert len(metadata_list) == 3
    # Ensure the proper instances exist
    for metadata in metadata_list:
        if metadata.name == "byo_1":
            assert metadata.display_name == "factory"
        if metadata.name == "byo_2":
            assert metadata.display_name == "user"
        if metadata.name == "byo_3":
            assert metadata.display_name == "factory"

    byo_2 = tests_hierarchy_manager.get("byo_2")
    assert byo_2.resource.startswith(str(schemaspace_location))

    metadata = Metadata(**byo_metadata_json)
    metadata.display_name = "user"
    instance = tests_hierarchy_manager.update("byo_3", metadata)
    assert instance is not None
    assert instance.resource.startswith(str(schemaspace_location))

    metadata_list = tests_hierarchy_manager.get_all()
    assert len(metadata_list) == 3
    # Ensure the proper instances exist
    for metadata in metadata_list:
        if metadata.name == "byo_1":
            assert metadata.display_name == "factory"
        if metadata.name == "byo_2":
            assert metadata.display_name == "user"
        if metadata.name == "byo_3":
            assert metadata.display_name == "user"

    byo_2 = tests_hierarchy_manager.get("byo_2")
    assert byo_2.resource.startswith(str(schemaspace_location))
Example #3
0
def test_pipeline_process(monkeypatch, processor, parsed_pipeline,
                          sample_metadata):

    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="airflow",
                              metadata=sample_metadata)
    mocked_path = "/some-placeholder"

    monkeypatch.setattr(processor, "_get_metadata_configuration",
                        lambda namespace, name: mocked_runtime)
    monkeypatch.setattr(
        processor, "create_pipeline_file",
        lambda pipeline, pipeline_export_format, pipeline_export_path,
        pipeline_name: mocked_path)

    monkeypatch.setattr(github.Github, "get_repo", lambda x, y: True)
    monkeypatch.setattr(git.GithubClient, "upload_dag", lambda x, y, z: True)

    response = processor.process(pipeline=parsed_pipeline)

    assert response.run_url == sample_metadata['api_endpoint']
    assert response.object_storage_url == sample_metadata['cos_endpoint']
    # Verifies that only this substring is in the storage path since a timestamp is injected into the name
    assert "/" + sample_metadata[
        'cos_bucket'] + "/" + "untitled" in response.object_storage_path
Example #4
0
async def cli_catalog_instance(jp_fetch):
    # Create new registry instance with a single URL-based component
    # This is not a fixture because it needs to
    paths = [_get_resource_path("kfp_test_operator.yaml")]

    instance_metadata = {
        "description": "A test registry",
        "runtime_type": RuntimeProcessorType.KUBEFLOW_PIPELINES.name,
        "categories": ["New Components"],
        "paths": paths,
    }
    instance = Metadata(
        schema_name="local-file-catalog",
        name=TEST_CATALOG_NAME,
        display_name="New Test Catalog",
        metadata=instance_metadata,
    )

    body = json.dumps(instance.to_dict())
    r = await jp_fetch("elyra",
                       "metadata",
                       ComponentCatalogs.COMPONENT_CATALOGS_SCHEMASPACE_ID,
                       body=body,
                       method="POST")
    assert r.code == 201
    r = await jp_fetch("elyra", "metadata",
                       ComponentCatalogs.COMPONENT_CATALOGS_SCHEMASPACE_ID)
    assert r.code == 200
    metadata = json.loads(r.body.decode())
    assert len(metadata) >= 1
Example #5
0
def test_manager_rollback_delete(tests_manager):
    metadata_name = "rollback_delete"

    metadata = Metadata(**valid_metadata2_json)

    # Create the instance
    instance = tests_manager.create(metadata_name, metadata)

    os.environ[
        "METADATA_TEST_HOOK_OP"] = "delete"  # Tell test class which op to raise
    # Delete post-save hook will throw FileNotFoundError
    with pytest.raises(FileNotFoundError):
        tests_manager.remove(metadata_name)

    # Ensure the instance still exists
    instance2 = tests_manager.get(metadata_name)
    assert instance2.display_name == instance.display_name

    os.environ.pop("METADATA_TEST_HOOK_OP")  # Restore normal operation
    # Ensure we can still delete
    tests_manager.remove(metadata_name)

    # Ensure the instance was deleted
    with pytest.raises(MetadataNotFoundError):
        tests_manager.get(metadata_name)
Example #6
0
def test_manager_bad_update(tests_hierarchy_manager, schemaspace_location):

    # Create some metadata, then attempt to update it with a known schema violation
    # and ensure the previous copy still exists...

    # Create a user instance...
    metadata = Metadata(**byo_metadata_json)
    metadata.display_name = "user1"
    instance = tests_hierarchy_manager.create("bad_update", metadata)
    assert instance is not None
    assert instance.resource.startswith(str(schemaspace_location))

    # Now, attempt to update the user instance, but include a schema violation.
    # Verify the update failed, but also ensure the previous instance is still there.

    instance2 = tests_hierarchy_manager.get("bad_update")
    instance2.display_name = "user2"
    instance2.metadata["number_range_test"] = 42  # number is out of range
    with pytest.raises(ValidationError):
        tests_hierarchy_manager.update("bad_update", instance2)

    _ensure_single_instance(tests_hierarchy_manager, schemaspace_location, "bad_update.json")

    instance2 = tests_hierarchy_manager.get("bad_update")
    assert instance2.display_name == instance.display_name
    assert "number_range_test" not in instance2.metadata

    # Now try update without providing a name, ValueError expected
    instance2 = tests_hierarchy_manager.get("bad_update")
    instance2.display_name = "user update with no name"
    with pytest.raises(ValueError):
        tests_hierarchy_manager.update(None, instance2)

    _ensure_single_instance(tests_hierarchy_manager, schemaspace_location, "bad_update.json")
Example #7
0
def test_manager_add_display_name(tests_manager, schemaspace_location):
    metadata_display_name = '1 teste "rĂ¡pido"'
    metadata_name = "a_1_teste_rpido"

    metadata = Metadata(**valid_display_name_json)
    instance = tests_manager.create(None, metadata)

    assert instance is not None
    assert instance.name == metadata_name
    assert instance.display_name == metadata_display_name

    # Ensure file was created using store_manager
    instance_list = tests_manager.metadata_store.fetch_instances(metadata_name)
    assert len(instance_list) == 1
    instance = Metadata.from_dict(METADATA_TEST_SCHEMASPACE, instance_list[0])
    metadata_location = _compose_instance_location(tests_manager.metadata_store, schemaspace_location, metadata_name)
    assert instance.resource == metadata_location
    assert instance.display_name == metadata_display_name

    # And finally, remove it.
    tests_manager.remove(metadata_name)

    # Verify removal using metadata_store
    with pytest.raises(MetadataNotFoundError):
        tests_manager.metadata_store.fetch_instances(metadata_name)
Example #8
0
def test_same_name_operator_in_pipeline(monkeypatch, processor, catalog_instance, parsed_pipeline, sample_metadata):
    task_id = "e3922a29-f4c0-43d9-8d8b-4509aab80032"
    upstream_task_id = "0eb57369-99d1-4cd0-a205-8d8d96af3ad4"

    mocked_runtime = Metadata(
        name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"]
    )

    monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, schemaspace=None: mocked_runtime)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda w, x, y, prefix: True)

    pipeline_def_operation = parsed_pipeline.operations[task_id]
    pipeline_def_operation_parameters = pipeline_def_operation.component_params_as_dict
    pipeline_def_operation_bash_param = pipeline_def_operation_parameters["bash_command"]

    assert pipeline_def_operation_bash_param["activeControl"] == "NestedEnumControl"
    assert set(pipeline_def_operation_bash_param["NestedEnumControl"].keys()) == {"value", "option"}
    assert pipeline_def_operation_bash_param["NestedEnumControl"]["value"] == upstream_task_id

    ordered_operations = processor._cc_pipeline(
        parsed_pipeline, pipeline_name="some-name", pipeline_instance_id="some-instance-name"
    )
    operation_parameters = ordered_operations[task_id]["component_params"]
    operation_parameter_bash_command = operation_parameters["bash_command"]

    assert operation_parameter_bash_command == "\"{{ ti.xcom_pull(task_ids='BashOperator_1') }}\""
Example #9
0
def test_manager_add_invalid(data_dir):
    # Use a local metadata mgr because we want to reference a bad namespace to ensure
    # directory metadata/invalid is not created.
    metadata_manager = MetadataManager(namespace='invalid')

    # Attempt with non Metadata instance
    with pytest.raises(TypeError):
        metadata_manager.add(invalid_metadata_json)

    # and invalid parameters
    with pytest.raises(ValueError):
        metadata_manager.add(None, invalid_metadata_json)

    with pytest.raises(ValueError):
        metadata_manager.add("foo", None)

    metadata = Metadata(**invalid_metadata_json)

    capture = io.StringIO()
    handler = StreamHandler(capture)
    metadata_manager.log.addHandler(handler)

    # Ensure save produces result of None and logging indicates validation error and file removal
    metadata_name = 'save_invalid'
    resource = metadata_manager.add(metadata_name, metadata)
    assert resource is None
    captured = capture.getvalue()
    assert "Schema validation failed" in captured
    assert "Removing metadata resource" in captured
    # Ensure file was not created.  Since this was the first instance of 'invalid', then
    # also ensure that directory 'metadata/invalid' was not created.
    invalid_metadata_dir = os.path.join(data_dir, 'metadata', 'invalid')
    assert not os.path.exists(invalid_metadata_dir)
    metadata_file = os.path.join(invalid_metadata_dir, 'save_invalid.json')
    assert not os.path.exists(metadata_file)
Example #10
0
def test_manager_add_remove_valid(runtimes_manager, metadata_runtimes_dir):
    metadata_name = 'valid_add_remove'

    metadata = Metadata(**valid_metadata_json)

    resource = runtimes_manager.add(metadata_name, metadata)
    assert resource is not None

    # Ensure file was created
    metadata_file = os.path.join(metadata_runtimes_dir,
                                 'valid_add_remove.json')
    assert os.path.exists(metadata_file)

    with open(metadata_file, 'r', encoding='utf-8') as f:
        valid_add = json.loads(f.read())
        assert "resource" not in valid_add
        assert "name" not in valid_add
        assert "display_name" in valid_add
        assert valid_add['display_name'] == "valid runtime"
        assert "schema_name" in valid_add
        assert valid_add['schema_name'] == "test"

    # Attempt to create again w/o replace, then replace it.
    resource = runtimes_manager.add(metadata_name, metadata, replace=False)
    assert resource is None

    resource = runtimes_manager.add(metadata_name, metadata)
    assert resource is not None

    # And finally, remove it.
    resource = runtimes_manager.remove(metadata_name)

    assert not os.path.exists(metadata_file)
    assert resource == metadata_file
Example #11
0
def test_store_schemaspace(store_manager, schemaspace_location):
    # Delete the metadata dir contents and attempt listing metadata
    _remove_schemaspace(store_manager, schemaspace_location)
    assert store_manager.schemaspace_exists() is False

    # create some metadata
    store_manager.store_instance("ensure_schemaspace_exists", Metadata(**valid_metadata_json).prepare_write())
    assert store_manager.schemaspace_exists()
Example #12
0
def catalog_instance(component_cache, request):
    """Creates an instance of a component catalog and removes after test."""
    instance_metadata = request.param

    instance_name = "component_cache"
    md_mgr = MetadataManager(schemaspace=ComponentCatalogs.COMPONENT_CATALOGS_SCHEMASPACE_ID)
    catalog = md_mgr.create(instance_name, Metadata(**instance_metadata))
    component_cache.wait_for_all_cache_tasks()
    yield catalog
    md_mgr.remove(instance_name)
Example #13
0
def test_manager_add_empty_display_name(tests_manager):
    # Found that empty display_name values were passing validation, so minLength=1 was added
    metadata_name = "empty_display_name"
    metadata = Metadata(**valid_metadata_json)
    metadata.display_name = ""
    with pytest.raises(ValidationError):
        tests_manager.create(metadata_name, metadata)

    # Ensure file was not created using storage manager
    with pytest.raises(MetadataNotFoundError):
        tests_manager.metadata_store.fetch_instances(metadata_name)
async def test_directory_based_component_catalog(
        component_cache, metadata_manager_with_teardown, create_inprocess):
    # Get initial set of components
    initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR)

    # Create new directory-based registry instance with components in ../../test/resources/components
    registry_path = _get_resource_path("")
    instance_metadata = {
        "description": "A test registry",
        "runtime_type": RUNTIME_PROCESSOR.name,
        "categories": ["New Components"],
        "paths": [registry_path],
    }
    registry_instance = Metadata(
        schema_name="local-directory-catalog",
        name=TEST_CATALOG_NAME,
        display_name="New Test Registry",
        metadata=instance_metadata,
    )

    if create_inprocess:
        metadata_manager_with_teardown.create(TEST_CATALOG_NAME,
                                              registry_instance)
    else:
        res: CompletedProcess = run([
            "elyra-metadata",
            "install",
            "component-catalogs",
            f"--schema_name={registry_instance.schema_name}",
            f"--json={registry_instance.to_json()}",
            f"--name={TEST_CATALOG_NAME}",
        ])
        assert res.returncode == 0

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Get new set of components from all active registries, including added test registry
    components_after_create = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_create) == len(initial_components) + 4

    # Check that all relevant components from the new registry have been added
    added_component_names = [
        component.name for component in components_after_create
    ]
    assert "Filter text" in added_component_names
    assert "Test Operator" in added_component_names
    assert "Test Operator No Inputs" in added_component_names

    # Delete the test registry and wait for updates to complete
    metadata_manager_with_teardown.remove(TEST_CATALOG_NAME)
    component_cache.wait_for_all_cache_tasks()
Example #15
0
def test_manager_hierarchy_remove(tests_hierarchy_manager, factory_location,
                                  shared_location, schemaspace_location):

    # Create additional instances in shared and user areas
    byo_2 = byo_metadata_json
    byo_2["display_name"] = "shared"
    create_json_file(shared_location, "byo_2.json", byo_2)

    metadata = Metadata(**byo_metadata_json)
    metadata.display_name = "user"
    instance = tests_hierarchy_manager.update("byo_2", metadata)
    assert instance is not None
    assert instance.resource.startswith(str(schemaspace_location))

    # Confirm on in user is found...
    metadata_list = tests_hierarchy_manager.get_all()
    assert len(metadata_list) == 3
    # Ensure the proper instances exist
    for metadata in metadata_list:
        if metadata.name == "byo_1":
            assert metadata.display_name == "factory"
        if metadata.name == "byo_2":
            assert metadata.display_name == "user"
        if metadata.name == "byo_3":
            assert metadata.display_name == "factory"

    byo_2 = tests_hierarchy_manager.get("byo_2")
    assert byo_2.resource.startswith(str(schemaspace_location))

    # Now remove instance.  Should be allowed since it resides in user area
    tests_hierarchy_manager.remove("byo_2")
    _ensure_single_instance(tests_hierarchy_manager,
                            schemaspace_location,
                            "byo_2.json",
                            expected_count=0)

    # Attempt to remove instance from shared area and its protected
    with pytest.raises(PermissionError) as pe:
        tests_hierarchy_manager.remove("byo_2")
    assert "Removal of instance 'byo_2'" in str(pe.value)

    # Ensure the one that exists is the one in the shared area
    byo_2 = tests_hierarchy_manager.get("byo_2")
    assert byo_2.resource.startswith(str(shared_location))

    # Attempt to remove instance from factory area and its protected as well
    with pytest.raises(PermissionError) as pe:
        tests_hierarchy_manager.remove("byo_1")
    assert "Removal of instance 'byo_1'" in str(pe.value)

    byo_1 = tests_hierarchy_manager.get("byo_1")
    assert byo_1.resource.startswith(str(factory_location))
Example #16
0
def parsed_ordered_dict(monkeypatch, processor, parsed_pipeline, sample_metadata, sample_image_metadata):

    mocked_runtime = Metadata(
        name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"]
    )

    mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_image_metadata])

    monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True)
    monkeypatch.setattr(processor, "_get_dependency_archive_name", lambda x: True)
    monkeypatch.setattr(processor, "_verify_cos_connectivity", lambda x: True)

    return processor._cc_pipeline(parsed_pipeline, pipeline_name="some-name")
Example #17
0
def airflow_runtime_instance():
    """Creates an airflow RTC and removes it after test."""
    instance_name = "valid_airflow_test_config"
    instance_config_file = Path(__file__).parent / "resources" / "runtime_configs" / f"{instance_name}.json"
    with open(instance_config_file, "r") as fd:
        instance_config = json.load(fd)

    md_mgr = MetadataManager(schemaspace=Runtimes.RUNTIMES_SCHEMASPACE_ID)
    # clean possible orphaned instance...
    try:
        md_mgr.remove(instance_name)
    except Exception:
        pass
    runtime_instance = md_mgr.create(instance_name, Metadata(**instance_config))
    yield runtime_instance.name
    md_mgr.remove(runtime_instance.name)
Example #18
0
def test_store_store_instance(store_manager, schemaspace_location):
    # Remove schemaspace to test raw creation and confirm perms
    _remove_schemaspace(store_manager, schemaspace_location)

    metadata_name = "persist"
    metadata = Metadata(**valid_metadata_json)
    metadata_dict = metadata.prepare_write()

    instance = store_manager.store_instance(metadata_name, metadata_dict)
    assert instance is not None

    if isinstance(store_manager, FileMetadataStore):
        dir_mode = oct(os.stat(schemaspace_location).st_mode
                       & 0o777777)  # Be sure to include other attributes
        assert dir_mode == "0o40700"  # and ensure this is a directory with only rwx by owner enabled

        # Ensure file was created
        metadata_file = os.path.join(schemaspace_location, "persist.json")
        assert os.path.exists(metadata_file)
        file_mode = oct(os.stat(metadata_file).st_mode
                        & 0o777777)  # Be sure to include other attributes
        assert file_mode == "0o100600"  # and ensure this is a regular file with only rw by owner enabled

        with open(metadata_file, "r", encoding="utf-8") as f:
            valid_add = json.loads(f.read())
            assert "resource" not in valid_add
            assert "name" not in valid_add
            assert "display_name" in valid_add
            assert valid_add["display_name"] == "valid metadata instance"
            assert "schema_name" in valid_add
            assert valid_add["schema_name"] == "metadata-test"

    # Attempt to create again w/o replace, then replace it.
    with pytest.raises(MetadataExistsError):
        store_manager.store_instance(metadata_name, metadata.prepare_write())

    metadata.metadata["number_range_test"] = 10
    instance = store_manager.store_instance(metadata_name,
                                            metadata.prepare_write(),
                                            for_update=True)
    assert instance is not None
    assert instance.get("metadata")["number_range_test"] == 10
Example #19
0
def test_manager_rollback_create(tests_manager):
    metadata_name = "rollback_create"

    metadata = Metadata(**valid_metadata2_json)

    os.environ["METADATA_TEST_HOOK_OP"] = "create"  # Tell test class which op to raise
    # Create post-save hook will throw NotImplementedError
    with pytest.raises(NotImplementedError):
        tests_manager.create(metadata_name, metadata)

    # Ensure nothing got created
    with pytest.raises(MetadataNotFoundError):
        tests_manager.get(metadata_name)

    os.environ.pop("METADATA_TEST_HOOK_OP")  # Restore normal operation
    instance = tests_manager.create(metadata_name, metadata)
    instance2 = tests_manager.get(metadata_name)
    assert instance.name == instance2.name
    assert instance.schema_name == instance2.schema_name
    assert instance.post_property == instance2.post_property
Example #20
0
def test_manager_add_short_name(tests_manager, schemaspace_location):
    # Found that single character names were failing validation
    metadata_name = "a"
    metadata = Metadata(**valid_metadata_json)
    instance = tests_manager.create(metadata_name, metadata)

    assert instance is not None
    assert instance.name == metadata_name

    # Ensure file was created using store_manager
    instance_list = tests_manager.metadata_store.fetch_instances(metadata_name)
    assert len(instance_list) == 1
    instance = Metadata.from_dict(METADATA_TEST_SCHEMASPACE_ID, instance_list[0])
    metadata_location = _compose_instance_location(tests_manager.metadata_store, schemaspace_location, metadata_name)
    assert instance.resource == metadata_location

    # And finally, remove it.
    tests_manager.remove(metadata_name)

    # Verify removal using metadata_store
    with pytest.raises(MetadataNotFoundError):
        tests_manager.metadata_store.fetch_instances(metadata_name)
Example #21
0
def test_manager_rollback_update(tests_manager):
    metadata_name = "rollback_update"

    metadata = Metadata(**valid_metadata2_json)

    # Create the instance
    instance = tests_manager.create(metadata_name, metadata)
    original_display_name = instance.display_name
    instance.display_name = "Updated_" + original_display_name

    os.environ["METADATA_TEST_HOOK_OP"] = "update"  # Tell test class which op to raise
    # Update post-save hook will throw ModuleNotFoundError
    with pytest.raises(ModuleNotFoundError):
        tests_manager.update(metadata_name, instance)

    # Ensure the display_name is still the original value.
    instance2 = tests_manager.get(metadata_name)
    assert instance2.display_name == original_display_name

    os.environ.pop("METADATA_TEST_HOOK_OP")  # Restore normal operation
    # Ensure we can still update
    instance = tests_manager.update(metadata_name, instance)
    assert instance.display_name == "Updated_" + original_display_name
Example #22
0
def test_manager_add_remove_valid(tests_manager, schemaspace_location):
    metadata_name = "valid_add_remove"

    # Remove schemaspace_location and ensure it gets created
    _remove_schemaspace(tests_manager.metadata_store, schemaspace_location)

    metadata = Metadata(**valid_metadata_json)

    instance = tests_manager.create(metadata_name, metadata)
    assert instance is not None

    # Attempt to create again w/o replace, then replace it.
    with pytest.raises(MetadataExistsError):
        tests_manager.create(metadata_name, metadata)

    instance = tests_manager.update(metadata_name, metadata)
    assert instance is not None

    # And finally, remove it.
    tests_manager.remove(metadata_name)

    # Verify removal using metadata_store
    with pytest.raises(MetadataNotFoundError):
        tests_manager.metadata_store.fetch_instances(metadata_name)
Example #23
0
def test_create_file(monkeypatch, processor, parsed_pipeline,
                     parsed_ordered_dict, sample_metadata):
    pipeline_json = _read_pipeline_resource(PIPELINE_FILE)

    export_pipeline_name = "some-name"
    export_file_type = "py"

    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="airflow",
                              metadata=sample_metadata)

    monkeypatch.setattr(processor,
                        "_get_metadata_configuration",
                        lambda name=None, namespace=None: mocked_runtime)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store",
                        lambda x, y, z: True)
    monkeypatch.setattr(processor, "_cc_pipeline",
                        lambda x, y: parsed_ordered_dict)

    with tempfile.TemporaryDirectory() as temp_dir:
        export_pipeline_output_path = os.path.join(
            temp_dir, f'{export_pipeline_name}.py')

        response = processor.create_pipeline_file(
            parsed_pipeline,
            pipeline_export_format=export_file_type,
            pipeline_export_path=export_pipeline_output_path,
            pipeline_name=export_pipeline_name)

        assert export_pipeline_output_path == response
        assert os.path.isfile(export_pipeline_output_path)

        file_as_lines = open(response).read().splitlines()

        # Check DAG project name
        for i in range(len(file_as_lines)):
            if "args = {" == file_as_lines[i]:
                assert "project_id" == read_key_pair(file_as_lines[i + 1],
                                                     sep=':')['key']
                assert export_pipeline_name == read_key_pair(file_as_lines[i +
                                                                           1],
                                                             sep=':')['value']

        # For every node in the original pipeline json
        for node in pipeline_json['pipelines'][0]['nodes']:
            for i in range(len(file_as_lines)):
                # Matches an op with a node ID
                if "notebook_op_" + node['id'].replace(
                        "-", "_") + " = NotebookOp(" in file_as_lines[i]:
                    sub_list_line_counter = 0
                    # Gets sub-list slice starting where the Notebook Op starts
                    for line in file_as_lines[i + 1:]:
                        if 'namespace=' in line:
                            assert sample_metadata[
                                'user_namespace'] == read_key_pair(
                                    line)['value']
                        elif 'cos_endpoint=' in line:
                            assert sample_metadata[
                                'cos_endpoint'] == read_key_pair(line)['value']
                        elif 'cos_bucket=' in line:
                            assert sample_metadata[
                                'cos_bucket'] == read_key_pair(line)['value']
                        elif 'name=' in line:
                            assert node['app_data']['ui_data'][
                                'label'] == read_key_pair(line)['value']
                        elif 'notebook=' in line:
                            assert node['app_data'][
                                'filename'] == read_key_pair(line)['value']
                        elif 'image=' in line:
                            assert node['app_data'][
                                'runtime_image'] == read_key_pair(
                                    line)['value']
                        elif 'env_vars=' in line:
                            for env in node['app_data']['env_vars']:
                                var, value = env.split("=")
                                # Gets sub-list slice starting where the env vars starts
                                for env_line in file_as_lines[
                                        i + sub_list_line_counter + 2:]:
                                    if "AWS_ACCESS_KEY_ID" in env_line:
                                        assert sample_metadata[
                                            'cos_username'] == read_key_pair(
                                                env_line, sep=':')['value']
                                    elif "AWS_SECRET_ACCESS_KEY" in env_line:
                                        assert sample_metadata[
                                            'cos_password'] == read_key_pair(
                                                env_line, sep=':')['value']
                                    elif var in env_line:
                                        assert var == read_key_pair(
                                            env_line, sep=':')['key']
                                        assert value == read_key_pair(
                                            env_line, sep=':')['value']
                                    elif env_line.strip(
                                    ) == '},':  # end of env vars
                                        break
                        elif 'pipeline_inputs=' in line and node[
                                'app_data'].get('inputs'):
                            for input in node['app_data']['inputs']:
                                assert input in string_to_list(
                                    read_key_pair(line)['value'])
                        elif 'pipeline_outputs=' in line and node[
                                'app_data'].get('outputs'):
                            for output in node['app_data']['outputs']:
                                assert output in string_to_list(
                                    read_key_pair(line)['value'])
                        elif line == ')':  # End of this Notebook Op
                            break
                        sub_list_line_counter += 1
Example #24
0
    def start(self):
        super().start()  # process options

        # Get known options, then gather display_name and build metadata dict.
        name = self.name_option.value
        schema_name = self.schema_name_option.value
        display_name = None

        metadata = {}
        # Walk the options looking for SchemaProperty instances. Any MetadataSchemaProperty instances go
        # into the metadata dict.  Note that we process JSONBasedOptions (--json or --file) prior to
        # MetadataSchemaProperty types since the former will set the base metadata stanza and individual
        # values can be used to override the former's content (like BYO authentication OVPs, for example).
        for option in self.options:
            if isinstance(option, MetadataSchemaProperty):
                # skip adding any non required properties that have no value (unless its a null type).
                if not option.required and not option.value and option.type != "null":
                    continue
                metadata[option.name] = option.value
            elif isinstance(option, SchemaProperty):
                if option.name == "display_name":  # Be sure we have a display_name
                    display_name = option.value
                    continue
            elif isinstance(option, JSONBasedOption):
                metadata.update(option.metadata)

        if display_name is None and self.replace_flag.value is False:  # Only require on create
            self.log_and_exit(
                f"Could not determine display_name from schema '{schema_name}'"
            )

        ex_msg = None
        new_instance = None
        try:
            if self.replace_flag.value:  # if replacing, fetch the instance so it can be updated
                updated_instance = self.metadata_manager.get(name)
                updated_instance.schema_name = schema_name
                if display_name:
                    updated_instance.display_name = display_name
                updated_instance.metadata.update(metadata)
                new_instance = self.metadata_manager.update(
                    name, updated_instance)
            else:  # create a new instance
                instance = Metadata(schema_name=schema_name,
                                    name=name,
                                    display_name=display_name,
                                    metadata=metadata)
                new_instance = self.metadata_manager.create(name, instance)
        except Exception as ex:
            ex_msg = str(ex)

        if new_instance:
            print(
                f"Metadata instance '{new_instance.name}' for schema '{schema_name}' has been written "
                f"to: {new_instance.resource}")
        else:
            if ex_msg:
                self.log_and_exit(
                    f"The following exception occurred saving metadata instance "
                    f"for schema '{schema_name}': {ex_msg}",
                    display_help=False,
                )
            else:
                self.log_and_exit(
                    f"A failure occurred saving metadata instance '{name}' for "
                    f"schema '{schema_name}'.",
                    display_help=False,
                )
Example #25
0
def test_cc_pipeline_component_no_input(monkeypatch, processor,
                                        component_cache, sample_metadata,
                                        tmpdir):
    """
    Verifies that cc_pipeline can handle KFP component definitions that don't
    include any inputs
    """
    # Define the appropriate reader for a filesystem-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)

    # Assign test resource location
    cpath = (Path(__file__).parent / ".." / "resources" / "components" /
             "kfp_test_operator_no_inputs.yaml").resolve()
    assert cpath.is_file()
    cpath = str(cpath)

    # Read contents of given path -- read_component_definition() returns a
    # a dictionary of component definition content indexed by path
    entry_data = reader.get_entry_data({"path": cpath}, {})
    component_definition = entry_data.definition

    # Instantiate a file-based component
    component_id = "test-component"
    component = Component(
        id=component_id,
        name="No input data",
        description="",
        op="no-input-data",
        catalog_type="elyra-kfp-examples-catalog",
        component_reference={"path": cpath},
        definition=component_definition,
        properties=[],
        categories=[],
    )

    # Fabricate the component cache to include single filename-based component for testing
    component_cache._component_cache[processor._type.name] = {
        "spoofed_catalog": {
            "components": {
                component_id: component
            }
        }
    }

    # Construct hypothetical operation for component
    operation_name = "no-input-test"
    operation_params = {}
    operation = Operation(
        id="no-input-id",
        type="execution_node",
        classifier=component_id,
        name=operation_name,
        parent_operation_ids=[],
        component_params=operation_params,
    )

    # Build a mock runtime config for use in _cc_pipeline
    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="kfp",
                              metadata=sample_metadata)

    mocked_func = mock.Mock(return_value="default",
                            side_effect=[mocked_runtime, sample_metadata])
    monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)

    # Construct single-operation pipeline
    pipeline = Pipeline(id="pipeline-id",
                        name="kfp_test",
                        runtime="kfp",
                        runtime_config="test",
                        source="no_input.pipeline")
    pipeline.operations[operation.id] = operation

    constructed_pipeline_function = lambda: processor._cc_pipeline(
        pipeline=pipeline, pipeline_name="test_pipeline")
    pipeline_path = str(Path(tmpdir) / "no_inputs_test.yaml")

    # Compile pipeline and save into pipeline_path
    kfp_argo_compiler.Compiler().compile(constructed_pipeline_function,
                                         pipeline_path)
async def test_modify_component_catalogs(jp_environ, component_cache,
                                         metadata_manager_with_teardown,
                                         create_inprocess):
    # Get initial set of components
    initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR)

    # Create new registry instance with a single URL-based component
    paths = [_get_resource_path("kfp_test_operator.yaml")]

    instance_metadata = {
        "description": "A test registry",
        "runtime_type": RUNTIME_PROCESSOR.name,
        "categories": ["New Components"],
        "paths": paths,
    }
    registry_instance = Metadata(
        schema_name="local-file-catalog",
        name=TEST_CATALOG_NAME,
        display_name="New Test Registry",
        metadata=instance_metadata,
    )

    if create_inprocess:
        metadata_manager_with_teardown.create(TEST_CATALOG_NAME,
                                              registry_instance)
    else:
        res: CompletedProcess = run([
            "elyra-metadata",
            "install",
            "component-catalogs",
            f"--schema_name={registry_instance.schema_name}",
            f"--json={registry_instance.to_json()}",
            f"--name={TEST_CATALOG_NAME}",
        ])
        assert res.returncode == 0

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Get new set of components from all active registries, including added test registry
    components_after_create = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_create) == len(initial_components) + 1

    added_component_names = [
        component.name for component in components_after_create
    ]
    assert "Test Operator" in added_component_names
    assert "Test Operator No Inputs" not in added_component_names

    # Modify the test registry to add a path to the catalog instance
    paths.append(_get_resource_path("kfp_test_operator_no_inputs.yaml"))
    metadata_manager_with_teardown.update(TEST_CATALOG_NAME, registry_instance)

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Get set of components from all active registries, including modified test registry
    components_after_update = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_update) == len(initial_components) + 2

    modified_component_names = [
        component.name for component in components_after_update
    ]
    assert "Test Operator" in modified_component_names
    assert "Test Operator No Inputs" in modified_component_names

    # Delete the test registry
    metadata_manager_with_teardown.remove(TEST_CATALOG_NAME)

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Check that components remaining after delete are the same as before the new catalog was added
    components_after_remove = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_remove) == len(initial_components)
Example #27
0
def test_create_file_custom_components(
    monkeypatch, processor, catalog_instance, component_cache, parsed_pipeline, parsed_ordered_dict, sample_metadata
):
    pipeline_json = _read_pipeline_resource(PIPELINE_FILE_CUSTOM_COMPONENTS)

    export_pipeline_name = "some-name"
    export_file_type = "py"

    mocked_runtime = Metadata(
        name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"]
    )

    monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, schemaspace=None: mocked_runtime)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True)
    monkeypatch.setattr(processor, "_cc_pipeline", lambda x, y: parsed_ordered_dict)

    with tempfile.TemporaryDirectory() as temp_dir:
        export_pipeline_output_path = os.path.join(temp_dir, f"{export_pipeline_name}.py")

        response = processor.create_pipeline_file(
            parsed_pipeline,
            pipeline_export_format=export_file_type,
            pipeline_export_path=export_pipeline_output_path,
            pipeline_name=export_pipeline_name,
        )

        assert export_pipeline_output_path == response
        assert os.path.isfile(export_pipeline_output_path)

        file_as_lines = open(response).read().splitlines()

        pipeline_description = pipeline_json["pipelines"][0]["app_data"]["properties"]["description"]
        escaped_description = pipeline_description.replace('"""', '\\"\\"\\"')

        for i in range(len(file_as_lines)):
            if "args = {" == file_as_lines[i]:
                # Check DAG project name
                assert "project_id" == read_key_pair(file_as_lines[i + 1], sep=":")["key"]
                assert export_pipeline_name == read_key_pair(file_as_lines[i + 1], sep=":")["value"]
            elif 'description="""' in file_as_lines[i]:
                # Check that DAG contains the correct description
                line_no = i + 1
                description_as_lines = []
                while '"""' not in file_as_lines[line_no]:
                    description_as_lines.append(file_as_lines[line_no])
                    line_no += 1
                expected_description_lines = escaped_description.split("\n")
                assert description_as_lines == expected_description_lines

                # Nothing more to be done in file
                break

        # For every node in the original pipeline json
        for node in pipeline_json["pipelines"][0]["nodes"]:
            component_parameters = node["app_data"]["component_parameters"]
            for i in range(len(file_as_lines)):
                # Matches custom component operators
                if f"op_{node['id'].replace('-', '_')} = " in file_as_lines[i]:
                    for parameter in component_parameters:
                        # Find 'parameter=' clause in file_as_lines list
                        r = re.compile(rf"\s*{parameter}=.*")
                        parameter_clause = i + 1
                        assert len(list(filter(r.match, file_as_lines[parameter_clause:]))) > 0
Example #28
0
def test_create_file(monkeypatch, processor, parsed_pipeline, parsed_ordered_dict, sample_metadata):
    pipeline_json = _read_pipeline_resource(PIPELINE_FILE_COMPLEX)

    export_pipeline_name = "some-name"
    export_file_type = "py"

    mocked_runtime = Metadata(
        name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"]
    )

    monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, schemaspace=None: mocked_runtime)
    monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True)
    monkeypatch.setattr(processor, "_cc_pipeline", lambda x, y: parsed_ordered_dict)

    with tempfile.TemporaryDirectory() as temp_dir:
        export_pipeline_output_path = os.path.join(temp_dir, f"{export_pipeline_name}.py")

        response = processor.create_pipeline_file(
            parsed_pipeline,
            pipeline_export_format=export_file_type,
            pipeline_export_path=export_pipeline_output_path,
            pipeline_name=export_pipeline_name,
        )

        assert export_pipeline_output_path == response
        assert os.path.isfile(export_pipeline_output_path)

        file_as_lines = open(response).read().splitlines()

        assert "from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator" in file_as_lines

        # Check DAG project name
        for i in range(len(file_as_lines)):
            if "args = {" == file_as_lines[i]:
                assert "project_id" == read_key_pair(file_as_lines[i + 1], sep=":")["key"]
                assert export_pipeline_name == read_key_pair(file_as_lines[i + 1], sep=":")["value"]

        # For every node in the original pipeline json
        for node in pipeline_json["pipelines"][0]["nodes"]:
            component_parameters = node["app_data"]["component_parameters"]
            for i in range(len(file_as_lines)):
                # Matches a generic op with a node ID
                if f"op_{node['id'].replace('-', '_')} = KubernetesPodOperator(" in file_as_lines[i]:
                    sub_list_line_counter = 0
                    # Gets sub-list slice starting where the Notebook Op starts
                    init_line = i + 1
                    for line in file_as_lines[init_line:]:
                        if "namespace=" in line:
                            assert sample_metadata["metadata"]["user_namespace"] == read_key_pair(line)["value"]
                        elif "cos_endpoint=" in line:
                            assert sample_metadata["metadata"]["cos_endpoint"] == read_key_pair(line)["value"]
                        elif "cos_bucket=" in line:
                            assert sample_metadata["metadata"]["cos_bucket"] == read_key_pair(line)["value"]
                        elif "name=" in line:
                            assert node["app_data"]["ui_data"]["label"] == read_key_pair(line)["value"]
                        elif "notebook=" in line:
                            assert component_parameters["filename"] == read_key_pair(line)["value"]
                        elif "image=" in line:
                            assert component_parameters["runtime_image"] == read_key_pair(line)["value"]
                        elif "env_vars=" in line:
                            for env in component_parameters["env_vars"]:
                                var, value = env.split("=")
                                # Gets sub-list slice starting where the env vars starts
                                start_env = i + sub_list_line_counter + 2
                                for env_line in file_as_lines[start_env:]:
                                    if "AWS_ACCESS_KEY_ID" in env_line:
                                        assert (
                                            sample_metadata["metadata"]["cos_username"]
                                            == read_key_pair(env_line, sep=":")["value"]
                                        )
                                    elif "AWS_SECRET_ACCESS_KEY" in env_line:
                                        assert (
                                            sample_metadata["metadata"]["cos_password"]
                                            == read_key_pair(env_line, sep=":")["value"]
                                        )
                                    elif var in env_line:
                                        assert var == read_key_pair(env_line, sep=":")["key"]
                                        assert value == read_key_pair(env_line, sep=":")["value"]
                                    elif env_line.strip() == "},":  # end of env vars
                                        break
                        elif "pipeline_inputs=" in line and component_parameters.get("inputs"):
                            for input in component_parameters["inputs"]:
                                assert input in string_to_list(read_key_pair(line)["value"])
                        elif "pipeline_outputs=" in line and component_parameters.get("outputs"):
                            for output in component_parameters["outputs"]:
                                assert output in string_to_list(read_key_pair(line)["value"])
                        elif line == ")":  # End of this Notebook Op
                            break
                        sub_list_line_counter += 1
async def test_modify_component_catalogs(component_cache,
                                         metadata_manager_with_teardown,
                                         create_inprocess):
    # Get initial set of components
    initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR)

    # Create new registry instance with a single URL-based component
    urls = [
        "https://raw.githubusercontent.com/elyra-ai/elyra/master/elyra/tests/pipeline/resources/components/"
        "airflow_test_operator.py"
    ]

    instance_metadata = {
        "description": "A test registry",
        "runtime_type": RUNTIME_PROCESSOR.name,
        "categories": ["New Components"],
        "paths": urls,
    }
    registry_instance = Metadata(schema_name="url-catalog",
                                 name=TEST_CATALOG_NAME,
                                 display_name="New Test Registry",
                                 metadata=instance_metadata)

    if create_inprocess:
        metadata_manager_with_teardown.create(TEST_CATALOG_NAME,
                                              registry_instance)
    else:
        res: CompletedProcess = run([
            "elyra-metadata",
            "install",
            "component-catalogs",
            f"--schema_name={registry_instance.schema_name}",
            f"--json={registry_instance.to_json()}",
            f"--name={TEST_CATALOG_NAME}",
        ])
        assert res.returncode == 0

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Get new set of components from all active registries, including added test registry
    components_after_create = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_create) == len(initial_components) + 3

    added_component_names = [
        component.name for component in components_after_create
    ]
    assert "TestOperator" in added_component_names
    assert "TestOperatorNoInputs" not in added_component_names

    # Modify the test registry to add an additional path to
    urls.append(
        "https://raw.githubusercontent.com/elyra-ai/elyra/master/elyra/tests/pipeline/resources/components"
        "/airflow_test_operator_no_inputs.py")
    metadata_manager_with_teardown.update(TEST_CATALOG_NAME, registry_instance)

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Get set of components from all active registries, including modified test registry
    components_after_update = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_update) == len(initial_components) + 4

    modified_component_names = [
        component.name for component in components_after_update
    ]
    assert "TestOperator" in modified_component_names
    assert "TestOperatorNoInputs" in modified_component_names

    # Delete the test registry
    metadata_manager_with_teardown.remove(TEST_CATALOG_NAME)

    # Wait for update to complete
    component_cache.wait_for_all_cache_tasks()

    # Check that components remaining after delete are the same as before the new catalog was added
    components_after_remove = component_cache.get_all_components(
        RUNTIME_PROCESSOR)
    assert len(components_after_remove) == len(initial_components)
Example #30
0
def test_processing_filename_runtime_specific_component(
        monkeypatch, processor, sample_metadata, tmpdir):
    # Define the appropriate reader for a filesystem-type component definition
    kfp_supported_file_types = [".yaml"]
    reader = FilesystemComponentCatalogConnector(kfp_supported_file_types)

    # Assign test resource location
    absolute_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), "..", "resources",
                     "components", "download_data.yaml"))

    # Read contents of given path -- read_component_definition() returns a
    # a dictionary of component definition content indexed by path
    entry_data = reader.get_entry_data({"path": absolute_path}, {})
    component_definition = entry_data.definition

    # Instantiate a file-based component
    component_id = "test-component"
    component = Component(
        id=component_id,
        name="Download data",
        description="",
        op="download-data",
        catalog_type="elyra-kfp-examples-catalog",
        component_reference={"path": absolute_path},
        definition=component_definition,
        properties=[],
        categories=[],
    )

    # Fabricate the component cache to include single filename-based component for testing
    ComponentCache.instance()._component_cache[processor._type.name] = {
        "spoofed_catalog": {
            "components": {
                component_id: component
            }
        }
    }

    # Construct hypothetical operation for component
    operation_name = "Download data test"
    operation_params = {
        "url":
        "https://raw.githubusercontent.com/elyra-ai/elyra/master/tests/assets/helloworld.ipynb",
        "curl_options": "--location",
    }
    operation = Operation(
        id="download-data-id",
        type="execution_node",
        classifier=component_id,
        name=operation_name,
        parent_operation_ids=[],
        component_params=operation_params,
    )

    # Build a mock runtime config for use in _cc_pipeline
    mocked_runtime = Metadata(name="test-metadata",
                              display_name="test",
                              schema_name="kfp",
                              metadata=sample_metadata)

    mocked_func = mock.Mock(return_value="default",
                            side_effect=[mocked_runtime, sample_metadata])
    monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func)

    # Construct single-operation pipeline
    pipeline = Pipeline(id="pipeline-id",
                        name="kfp_test",
                        runtime="kfp",
                        runtime_config="test",
                        source="download_data.pipeline")
    pipeline.operations[operation.id] = operation

    # Establish path and function to construct pipeline
    pipeline_path = os.path.join(tmpdir, "kfp_test.yaml")
    constructed_pipeline_function = lambda: processor._cc_pipeline(
        pipeline=pipeline, pipeline_name="test_pipeline")

    # TODO Check against both argo and tekton compilations
    # Compile pipeline and save into pipeline_path
    kfp_argo_compiler.Compiler().compile(constructed_pipeline_function,
                                         pipeline_path)

    # Read contents of pipeline YAML
    with open(pipeline_path) as f:
        pipeline_yaml = yaml.safe_load(f.read())

    # Check the pipeline file contents for correctness
    pipeline_template = pipeline_yaml["spec"]["templates"][0]
    assert pipeline_template["metadata"]["annotations"][
        "pipelines.kubeflow.org/task_display_name"] == operation_name
    assert pipeline_template["container"]["command"][3] == operation_params[
        "url"]