def test_pipeline_process(monkeypatch, processor, parsed_pipeline, sample_metadata): mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata ) mocked_path = "/some-placeholder" monkeypatch.setattr(processor, "_get_metadata_configuration", lambda namespace, name: mocked_runtime) monkeypatch.setattr(processor, "create_pipeline_file", lambda pipeline, pipeline_export_format, pipeline_export_path, pipeline_name: mocked_path) monkeypatch.setattr(github.Github, "get_repo", lambda x, y: True) monkeypatch.setattr(git.GithubClient, "upload_dag", lambda x, y, z: True) response = processor.process(pipeline=parsed_pipeline) assert response.run_url == sample_metadata['api_endpoint'] assert response.object_storage_url == sample_metadata['cos_endpoint'] # Verifies that only this substring is in the storage path since a timestamp is injected into the name assert "/" + sample_metadata['cos_bucket'] + "/" + "untitled" in response.object_storage_path
def test_manager_rollback_update(tests_manager): metadata_name = "rollback_update" metadata = Metadata(**valid_metadata2_json) # Create the instance instance = tests_manager.create(metadata_name, metadata) original_display_name = instance.display_name instance.display_name = "Updated_" + original_display_name os.environ["METADATA_TEST_HOOK_OP"] = "update" # Tell test class which op to raise # Update post-save hook will throw ModuleNotFoundError with pytest.raises(ModuleNotFoundError): tests_manager.update(metadata_name, instance) # Ensure the display_name is still the original value. instance2 = tests_manager.get(metadata_name) assert instance2.display_name == original_display_name os.environ.pop("METADATA_TEST_HOOK_OP") # Restore normal operation # Ensure we can still update instance = tests_manager.update(metadata_name, instance) assert instance.display_name == "Updated_" + original_display_name
def test_same_name_operator_in_pipeline(monkeypatch, processor, catalog_instance, parsed_pipeline, sample_metadata): task_id = "e3922a29-f4c0-43d9-8d8b-4509aab80032" upstream_task_id = "0eb57369-99d1-4cd0-a205-8d8d96af3ad4" mocked_runtime = Metadata( name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"] ) monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, schemaspace=None: mocked_runtime) monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True) pipeline_def_operation = parsed_pipeline.operations[task_id] pipeline_def_operation_parameters = pipeline_def_operation.component_params_as_dict pipeline_def_operation_bash_param = pipeline_def_operation_parameters["bash_command"] assert pipeline_def_operation_bash_param["activeControl"] == "NestedEnumControl" assert set(pipeline_def_operation_bash_param["NestedEnumControl"].keys()) == {"value", "option"} assert pipeline_def_operation_bash_param["NestedEnumControl"]["value"] == upstream_task_id ordered_operations = processor._cc_pipeline(parsed_pipeline, pipeline_name="some-name") operation_parameters = ordered_operations[task_id]["component_params"] operation_parameter_bash_command = operation_parameters["bash_command"] assert operation_parameter_bash_command == "\"{{ ti.xcom_pull(task_ids='BashOperator_1') }}\""
def test_manager_rollback_delete(tests_manager): metadata_name = "rollback_delete" metadata = Metadata(**valid_metadata2_json) # Create the instance instance = tests_manager.create(metadata_name, metadata) os.environ["METADATA_TEST_HOOK_OP"] = "delete" # Tell test class which op to raise # Delete post-save hook will throw FileNotFoundError with pytest.raises(FileNotFoundError): tests_manager.remove(metadata_name) # Ensure the instance still exists instance2 = tests_manager.get(metadata_name) assert instance2.display_name == instance.display_name os.environ.pop("METADATA_TEST_HOOK_OP") # Restore normal operation # Ensure we can still delete tests_manager.remove(metadata_name) # Ensure the instance was deleted with pytest.raises(MetadataNotFoundError): tests_manager.get(metadata_name)
def test_manager_add_remove_valid(tests_manager, schemaspace_location): metadata_name = "valid_add_remove" # Remove schemaspace_location and ensure it gets created _remove_schemaspace(tests_manager.metadata_store, schemaspace_location) metadata = Metadata(**valid_metadata_json) instance = tests_manager.create(metadata_name, metadata) assert instance is not None # Attempt to create again w/o replace, then replace it. with pytest.raises(MetadataExistsError): tests_manager.create(metadata_name, metadata) instance = tests_manager.update(metadata_name, metadata) assert instance is not None # And finally, remove it. tests_manager.remove(metadata_name) # Verify removal using metadata_store with pytest.raises(MetadataNotFoundError): tests_manager.metadata_store.fetch_instances(metadata_name)
def test_store_store_instance(store_manager, schemaspace_location): # Remove schemaspace to test raw creation and confirm perms _remove_schemaspace(store_manager, schemaspace_location) metadata_name = "persist" metadata = Metadata(**valid_metadata_json) metadata_dict = metadata.prepare_write() instance = store_manager.store_instance(metadata_name, metadata_dict) assert instance is not None if isinstance(store_manager, FileMetadataStore): dir_mode = oct(os.stat(schemaspace_location).st_mode & 0o777777) # Be sure to include other attributes assert dir_mode == "0o40700" # and ensure this is a directory with only rwx by owner enabled # Ensure file was created metadata_file = os.path.join(schemaspace_location, "persist.json") assert os.path.exists(metadata_file) file_mode = oct(os.stat(metadata_file).st_mode & 0o777777) # Be sure to include other attributes assert file_mode == "0o100600" # and ensure this is a regular file with only rw by owner enabled with open(metadata_file, "r", encoding="utf-8") as f: valid_add = json.loads(f.read()) assert "resource" not in valid_add assert "name" not in valid_add assert "display_name" in valid_add assert valid_add["display_name"] == "valid metadata instance" assert "schema_name" in valid_add assert valid_add["schema_name"] == "metadata-test" # Attempt to create again w/o replace, then replace it. with pytest.raises(MetadataExistsError): store_manager.store_instance(metadata_name, metadata.prepare_write()) metadata.metadata["number_range_test"] = 10 instance = store_manager.store_instance(metadata_name, metadata.prepare_write(), for_update=True) assert instance is not None assert instance.get("metadata")["number_range_test"] == 10
def test_create_file(monkeypatch, processor, parsed_pipeline, parsed_ordered_dict, sample_metadata): pipeline_json = _read_pipeline_resource(PIPELINE_FILE) export_pipeline_name = "some-name" export_file_type = "py" mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata) monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, namespace=None: mocked_runtime) monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True) monkeypatch.setattr(processor, "_cc_pipeline", lambda x, y: parsed_ordered_dict) with tempfile.TemporaryDirectory() as temp_dir: export_pipeline_output_path = os.path.join( temp_dir, f'{export_pipeline_name}.py') response = processor.create_pipeline_file( parsed_pipeline, pipeline_export_format=export_file_type, pipeline_export_path=export_pipeline_output_path, pipeline_name=export_pipeline_name) assert export_pipeline_output_path == response assert os.path.isfile(export_pipeline_output_path) file_as_lines = open(response).read().splitlines() # Check DAG project name for i in range(len(file_as_lines)): if "args = {" == file_as_lines[i]: assert "project_id" == read_key_pair(file_as_lines[i + 1], sep=':')['key'] assert export_pipeline_name == read_key_pair(file_as_lines[i + 1], sep=':')['value'] # For every node in the original pipeline json for node in pipeline_json['pipelines'][0]['nodes']: for i in range(len(file_as_lines)): # Matches an op with a node ID if "notebook_op_" + node['id'].replace( "-", "_") + " = NotebookOp(" in file_as_lines[i]: sub_list_line_counter = 0 # Gets sub-list slice starting where the Notebook Op starts for line in file_as_lines[i + 1:]: if 'namespace=' in line: assert sample_metadata[ 'user_namespace'] == read_key_pair( line)['value'] elif 'cos_endpoint=' in line: assert sample_metadata[ 'cos_endpoint'] == read_key_pair(line)['value'] elif 'cos_bucket=' in line: assert sample_metadata[ 'cos_bucket'] == read_key_pair(line)['value'] elif 'name=' in line: assert node['app_data']['ui_data'][ 'label'] == read_key_pair(line)['value'] elif 'notebook=' in line: assert node['app_data'][ 'filename'] == read_key_pair(line)['value'] elif 'image=' in line: assert node['app_data'][ 'runtime_image'] == read_key_pair( line)['value'] elif 'env_vars=' in line: for env in node['app_data']['env_vars']: var, value = env.split("=") # Gets sub-list slice starting where the env vars starts for env_line in file_as_lines[ i + sub_list_line_counter + 2:]: if "AWS_ACCESS_KEY_ID" in env_line: assert sample_metadata[ 'cos_username'] == read_key_pair( env_line, sep=':')['value'] elif "AWS_SECRET_ACCESS_KEY" in env_line: assert sample_metadata[ 'cos_password'] == read_key_pair( env_line, sep=':')['value'] elif var in env_line: assert var == read_key_pair( env_line, sep=':')['key'] assert value == read_key_pair( env_line, sep=':')['value'] elif env_line.strip( ) == '},': # end of env vars break elif 'pipeline_inputs=' in line and node[ 'app_data'].get('inputs'): for input in node['app_data']['inputs']: assert input in string_to_list( read_key_pair(line)['value']) elif 'pipeline_outputs=' in line and node[ 'app_data'].get('outputs'): for output in node['app_data']['outputs']: assert output in string_to_list( read_key_pair(line)['value']) elif line == ')': # End of this Notebook Op break sub_list_line_counter += 1
def test_create_file_custom_components( monkeypatch, processor, catalog_instance, component_cache, parsed_pipeline, parsed_ordered_dict, sample_metadata ): pipeline_json = _read_pipeline_resource(PIPELINE_FILE_CUSTOM_COMPONENTS) export_pipeline_name = "some-name" export_file_type = "py" mocked_runtime = Metadata( name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"] ) monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, schemaspace=None: mocked_runtime) monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True) monkeypatch.setattr(processor, "_cc_pipeline", lambda x, y: parsed_ordered_dict) with tempfile.TemporaryDirectory() as temp_dir: export_pipeline_output_path = os.path.join(temp_dir, f"{export_pipeline_name}.py") response = processor.create_pipeline_file( parsed_pipeline, pipeline_export_format=export_file_type, pipeline_export_path=export_pipeline_output_path, pipeline_name=export_pipeline_name, ) assert export_pipeline_output_path == response assert os.path.isfile(export_pipeline_output_path) file_as_lines = open(response).read().splitlines() pipeline_description = pipeline_json["pipelines"][0]["app_data"]["properties"]["description"] escaped_description = pipeline_description.replace('"""', '\\"\\"\\"') for i in range(len(file_as_lines)): if "args = {" == file_as_lines[i]: # Check DAG project name assert "project_id" == read_key_pair(file_as_lines[i + 1], sep=":")["key"] assert export_pipeline_name == read_key_pair(file_as_lines[i + 1], sep=":")["value"] elif 'description="""' in file_as_lines[i]: # Check that DAG contains the correct description line_no = i + 1 description_as_lines = [] while '"""' not in file_as_lines[line_no]: description_as_lines.append(file_as_lines[line_no]) line_no += 1 expected_description_lines = escaped_description.split("\n") assert description_as_lines == expected_description_lines # Nothing more to be done in file break # For every node in the original pipeline json for node in pipeline_json["pipelines"][0]["nodes"]: component_parameters = node["app_data"]["component_parameters"] for i in range(len(file_as_lines)): # Matches custom component operators if f"op_{node['id'].replace('-', '_')} = " in file_as_lines[i]: for parameter in component_parameters: # Find 'parameter=' clause in file_as_lines list r = re.compile(rf"\s*{parameter}=.*") parameter_clause = i + 1 assert len(list(filter(r.match, file_as_lines[parameter_clause:]))) > 0
def test_create_file(monkeypatch, processor, parsed_pipeline, parsed_ordered_dict, sample_metadata): pipeline_json = _read_pipeline_resource(PIPELINE_FILE_COMPLEX) export_pipeline_name = "some-name" export_file_type = "py" mocked_runtime = Metadata( name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata["metadata"] ) monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, schemaspace=None: mocked_runtime) monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True) monkeypatch.setattr(processor, "_cc_pipeline", lambda x, y: parsed_ordered_dict) with tempfile.TemporaryDirectory() as temp_dir: export_pipeline_output_path = os.path.join(temp_dir, f"{export_pipeline_name}.py") response = processor.create_pipeline_file( parsed_pipeline, pipeline_export_format=export_file_type, pipeline_export_path=export_pipeline_output_path, pipeline_name=export_pipeline_name, ) assert export_pipeline_output_path == response assert os.path.isfile(export_pipeline_output_path) file_as_lines = open(response).read().splitlines() assert "from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator" in file_as_lines # Check DAG project name for i in range(len(file_as_lines)): if "args = {" == file_as_lines[i]: assert "project_id" == read_key_pair(file_as_lines[i + 1], sep=":")["key"] assert export_pipeline_name == read_key_pair(file_as_lines[i + 1], sep=":")["value"] # For every node in the original pipeline json for node in pipeline_json["pipelines"][0]["nodes"]: component_parameters = node["app_data"]["component_parameters"] for i in range(len(file_as_lines)): # Matches a generic op with a node ID if f"op_{node['id'].replace('-', '_')} = KubernetesPodOperator(" in file_as_lines[i]: sub_list_line_counter = 0 # Gets sub-list slice starting where the Notebook Op starts init_line = i + 1 for line in file_as_lines[init_line:]: if "namespace=" in line: assert sample_metadata["metadata"]["user_namespace"] == read_key_pair(line)["value"] elif "cos_endpoint=" in line: assert sample_metadata["metadata"]["cos_endpoint"] == read_key_pair(line)["value"] elif "cos_bucket=" in line: assert sample_metadata["metadata"]["cos_bucket"] == read_key_pair(line)["value"] elif "name=" in line: assert node["app_data"]["ui_data"]["label"] == read_key_pair(line)["value"] elif "notebook=" in line: assert component_parameters["filename"] == read_key_pair(line)["value"] elif "image=" in line: assert component_parameters["runtime_image"] == read_key_pair(line)["value"] elif "env_vars=" in line: for env in component_parameters["env_vars"]: var, value = env.split("=") # Gets sub-list slice starting where the env vars starts start_env = i + sub_list_line_counter + 2 for env_line in file_as_lines[start_env:]: if "AWS_ACCESS_KEY_ID" in env_line: assert ( sample_metadata["metadata"]["cos_username"] == read_key_pair(env_line, sep=":")["value"] ) elif "AWS_SECRET_ACCESS_KEY" in env_line: assert ( sample_metadata["metadata"]["cos_password"] == read_key_pair(env_line, sep=":")["value"] ) elif var in env_line: assert var == read_key_pair(env_line, sep=":")["key"] assert value == read_key_pair(env_line, sep=":")["value"] elif env_line.strip() == "},": # end of env vars break elif "pipeline_inputs=" in line and component_parameters.get("inputs"): for input in component_parameters["inputs"]: assert input in string_to_list(read_key_pair(line)["value"]) elif "pipeline_outputs=" in line and component_parameters.get("outputs"): for output in component_parameters["outputs"]: assert output in string_to_list(read_key_pair(line)["value"]) elif line == ")": # End of this Notebook Op break sub_list_line_counter += 1
async def test_modify_component_catalogs(component_cache, metadata_manager_with_teardown, create_inprocess): # Get initial set of components initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR) # Create new registry instance with a single URL-based component urls = [ "https://raw.githubusercontent.com/elyra-ai/elyra/master/elyra/tests/pipeline/resources/components/" "airflow_test_operator.py" ] instance_metadata = { "description": "A test registry", "runtime_type": RUNTIME_PROCESSOR.name, "categories": ["New Components"], "paths": urls, } registry_instance = Metadata(schema_name="url-catalog", name=TEST_CATALOG_NAME, display_name="New Test Registry", metadata=instance_metadata) if create_inprocess: metadata_manager_with_teardown.create(TEST_CATALOG_NAME, registry_instance) else: res: CompletedProcess = run([ "elyra-metadata", "install", "component-catalogs", f"--schema_name={registry_instance.schema_name}", f"--json={registry_instance.to_json()}", f"--name={TEST_CATALOG_NAME}", ]) assert res.returncode == 0 # Wait for update to complete component_cache.wait_for_all_cache_tasks() # Get new set of components from all active registries, including added test registry components_after_create = component_cache.get_all_components( RUNTIME_PROCESSOR) assert len(components_after_create) == len(initial_components) + 3 added_component_names = [ component.name for component in components_after_create ] assert "TestOperator" in added_component_names assert "TestOperatorNoInputs" not in added_component_names # Modify the test registry to add an additional path to urls.append( "https://raw.githubusercontent.com/elyra-ai/elyra/master/elyra/tests/pipeline/resources/components" "/airflow_test_operator_no_inputs.py") metadata_manager_with_teardown.update(TEST_CATALOG_NAME, registry_instance) # Wait for update to complete component_cache.wait_for_all_cache_tasks() # Get set of components from all active registries, including modified test registry components_after_update = component_cache.get_all_components( RUNTIME_PROCESSOR) assert len(components_after_update) == len(initial_components) + 4 modified_component_names = [ component.name for component in components_after_update ] assert "TestOperator" in modified_component_names assert "TestOperatorNoInputs" in modified_component_names # Delete the test registry metadata_manager_with_teardown.remove(TEST_CATALOG_NAME) # Wait for update to complete component_cache.wait_for_all_cache_tasks() # Check that components remaining after delete are the same as before the new catalog was added components_after_remove = component_cache.get_all_components( RUNTIME_PROCESSOR) assert len(components_after_remove) == len(initial_components)
def test_processing_filename_runtime_specific_component( monkeypatch, processor, sample_metadata, tmpdir): # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Assign test resource location absolute_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", "resources", "components", "download_data.yaml")) # Read contents of given path -- read_component_definition() returns a # a dictionary of component definition content indexed by path entry_data = reader.get_entry_data({"path": absolute_path}, {}) component_definition = entry_data.definition # Instantiate a file-based component component_id = "test-component" component = Component( id=component_id, name="Download data", description="", op="download-data", catalog_type="elyra-kfp-examples-catalog", component_reference={"path": absolute_path}, definition=component_definition, properties=[], categories=[], ) # Fabricate the component cache to include single filename-based component for testing ComponentCache.instance()._component_cache[processor._type.name] = { "spoofed_catalog": { "components": { component_id: component } } } # Construct hypothetical operation for component operation_name = "Download data test" operation_params = { "url": "https://raw.githubusercontent.com/elyra-ai/elyra/master/tests/assets/helloworld.ipynb", "curl_options": "--location", } operation = Operation( id="download-data-id", type="execution_node", classifier=component_id, name=operation_name, parent_operation_ids=[], component_params=operation_params, ) # Build a mock runtime config for use in _cc_pipeline mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) # Construct single-operation pipeline pipeline = Pipeline(id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="download_data.pipeline") pipeline.operations[operation.id] = operation # Establish path and function to construct pipeline pipeline_path = os.path.join(tmpdir, "kfp_test.yaml") constructed_pipeline_function = lambda: processor._cc_pipeline( pipeline=pipeline, pipeline_name="test_pipeline") # TODO Check against both argo and tekton compilations # Compile pipeline and save into pipeline_path kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path) # Read contents of pipeline YAML with open(pipeline_path) as f: pipeline_yaml = yaml.safe_load(f.read()) # Check the pipeline file contents for correctness pipeline_template = pipeline_yaml["spec"]["templates"][0] assert pipeline_template["metadata"]["annotations"][ "pipelines.kubeflow.org/task_display_name"] == operation_name assert pipeline_template["container"]["command"][3] == operation_params[ "url"]
async def test_modify_component_catalogs(jp_environ, component_cache, metadata_manager_with_teardown, create_inprocess): # Get initial set of components initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR) # Create new registry instance with a single URL-based component paths = [_get_resource_path("kfp_test_operator.yaml")] instance_metadata = { "description": "A test registry", "runtime_type": RUNTIME_PROCESSOR.name, "categories": ["New Components"], "paths": paths, } registry_instance = Metadata( schema_name="local-file-catalog", name=TEST_CATALOG_NAME, display_name="New Test Registry", metadata=instance_metadata, ) if create_inprocess: metadata_manager_with_teardown.create(TEST_CATALOG_NAME, registry_instance) else: res: CompletedProcess = run([ "elyra-metadata", "install", "component-catalogs", f"--schema_name={registry_instance.schema_name}", f"--json={registry_instance.to_json()}", f"--name={TEST_CATALOG_NAME}", ]) assert res.returncode == 0 # Wait for update to complete component_cache.wait_for_all_cache_tasks() # Get new set of components from all active registries, including added test registry components_after_create = component_cache.get_all_components( RUNTIME_PROCESSOR) assert len(components_after_create) == len(initial_components) + 1 added_component_names = [ component.name for component in components_after_create ] assert "Test Operator" in added_component_names assert "Test Operator No Inputs" not in added_component_names # Modify the test registry to add a path to the catalog instance paths.append(_get_resource_path("kfp_test_operator_no_inputs.yaml")) metadata_manager_with_teardown.update(TEST_CATALOG_NAME, registry_instance) # Wait for update to complete component_cache.wait_for_all_cache_tasks() # Get set of components from all active registries, including modified test registry components_after_update = component_cache.get_all_components( RUNTIME_PROCESSOR) assert len(components_after_update) == len(initial_components) + 2 modified_component_names = [ component.name for component in components_after_update ] assert "Test Operator" in modified_component_names assert "Test Operator No Inputs" in modified_component_names # Delete the test registry metadata_manager_with_teardown.remove(TEST_CATALOG_NAME) # Wait for update to complete component_cache.wait_for_all_cache_tasks() # Check that components remaining after delete are the same as before the new catalog was added components_after_remove = component_cache.get_all_components( RUNTIME_PROCESSOR) assert len(components_after_remove) == len(initial_components)
def start(self): super().start() # process options # Get known options, then gather display_name and build metadata dict. name = self.name_option.value schema_name = self.schema_name_option.value display_name = None metadata = {} # Walk the options looking for SchemaProperty instances. Any MetadataSchemaProperty instances go # into the metadata dict. Note that we process JSONBasedOptions (--json or --file) prior to # MetadataSchemaProperty types since the former will set the base metadata stanza and individual # values can be used to override the former's content (like BYO authentication OVPs, for example). for option in self.options: if isinstance(option, MetadataSchemaProperty): # skip adding any non required properties that have no value (unless its a null type). if not option.required and not option.value and option.type != "null": continue metadata[option.name] = option.value elif isinstance(option, SchemaProperty): if option.name == "display_name": # Be sure we have a display_name display_name = option.value continue elif isinstance(option, JSONBasedOption): metadata.update(option.metadata) if display_name is None and self.replace_flag.value is False: # Only require on create self.log_and_exit( f"Could not determine display_name from schema '{schema_name}'" ) ex_msg = None new_instance = None try: if self.replace_flag.value: # if replacing, fetch the instance so it can be updated updated_instance = self.metadata_manager.get(name) updated_instance.schema_name = schema_name if display_name: updated_instance.display_name = display_name updated_instance.metadata.update(metadata) new_instance = self.metadata_manager.update( name, updated_instance) else: # create a new instance instance = Metadata(schema_name=schema_name, name=name, display_name=display_name, metadata=metadata) new_instance = self.metadata_manager.create(name, instance) except Exception as ex: ex_msg = str(ex) if new_instance: print( f"Metadata instance '{new_instance.name}' for schema '{schema_name}' has been written " f"to: {new_instance.resource}") else: if ex_msg: self.log_and_exit( f"The following exception occurred saving metadata instance " f"for schema '{schema_name}': {ex_msg}", display_help=False, ) else: self.log_and_exit( f"A failure occurred saving metadata instance '{name}' for " f"schema '{schema_name}'.", display_help=False, )
def test_cc_pipeline_component_no_input(monkeypatch, processor, component_cache, sample_metadata, tmpdir): """ Verifies that cc_pipeline can handle KFP component definitions that don't include any inputs """ # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Assign test resource location cpath = (Path(__file__).parent / ".." / "resources" / "components" / "kfp_test_operator_no_inputs.yaml").resolve() assert cpath.is_file() cpath = str(cpath) # Read contents of given path -- read_component_definition() returns a # a dictionary of component definition content indexed by path entry_data = reader.get_entry_data({"path": cpath}, {}) component_definition = entry_data.definition # Instantiate a file-based component component_id = "test-component" component = Component( id=component_id, name="No input data", description="", op="no-input-data", catalog_type="elyra-kfp-examples-catalog", component_reference={"path": cpath}, definition=component_definition, properties=[], categories=[], ) # Fabricate the component cache to include single filename-based component for testing component_cache._component_cache[processor._type.name] = { "spoofed_catalog": { "components": { component_id: component } } } # Construct hypothetical operation for component operation_name = "no-input-test" operation_params = {} operation = Operation( id="no-input-id", type="execution_node", classifier=component_id, name=operation_name, parent_operation_ids=[], component_params=operation_params, ) # Build a mock runtime config for use in _cc_pipeline mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="kfp", metadata=sample_metadata) mocked_func = mock.Mock(return_value="default", side_effect=[mocked_runtime, sample_metadata]) monkeypatch.setattr(processor, "_get_metadata_configuration", mocked_func) # Construct single-operation pipeline pipeline = Pipeline(id="pipeline-id", name="kfp_test", runtime="kfp", runtime_config="test", source="no_input.pipeline") pipeline.operations[operation.id] = operation constructed_pipeline_function = lambda: processor._cc_pipeline( pipeline=pipeline, pipeline_name="test_pipeline") pipeline_path = str(Path(tmpdir) / "no_inputs_test.yaml") # Compile pipeline and save into pipeline_path kfp_argo_compiler.Compiler().compile(constructed_pipeline_function, pipeline_path)
async def test_directory_based_component_catalog( component_cache, metadata_manager_with_teardown, create_inprocess, tmpdir ): # Verify that the component cache is empty to prevent other tests # from having an impact on this' tests result initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR) assert len(initial_components) == 0, initial_components[0].name # Create and populate a temporary catalog directory catalog_dir = Path(tmpdir) / "catalog" catalog_dir.mkdir() # Copy a few YAML files from ../resources/components to # the catalog directory directory_entries = {"download_data.yaml": None, "kfp_test_operator_no_inputs.yaml": None} for file in directory_entries: with open(_get_resource_path(file), "r") as fh_in: # read file data = fh_in.read() # extract and store component name directory_entries[file] = yaml.safe_load(data)["name"] # write (unchanged) file to destination with open(catalog_dir / file, "w") as fh_out: fh_out.write(data) # make sure the file exists in the destination assert (catalog_dir / file).is_file() # Create new directory-based registry instance_metadata = { "description": "A test registry", "runtime_type": RUNTIME_PROCESSOR.name, "categories": ["New Components"], "paths": [str(catalog_dir)], } registry_instance = Metadata( schema_name="local-directory-catalog", name=TEST_CATALOG_NAME, display_name="New Test Registry", metadata=instance_metadata, ) if create_inprocess: metadata_manager_with_teardown.create(TEST_CATALOG_NAME, registry_instance) else: res: CompletedProcess = run( [ "elyra-metadata", "install", "component-catalogs", f"--schema_name={registry_instance.schema_name}", f"--json={registry_instance.to_json()}", f"--name={TEST_CATALOG_NAME}", ] ) assert res.returncode == 0 # Wait for update to complete component_cache.wait_for_all_cache_tasks() # Verify that the number of components in the cache equals the number of # components in the directory catalog components_after_create = component_cache.get_all_components(RUNTIME_PROCESSOR) assert len(components_after_create) == len(directory_entries), components_after_create # Verify the component names added_component_names = [component.name for component in components_after_create] for component in directory_entries: assert directory_entries[component] in added_component_names # Delete the test registry and wait for updates to complete metadata_manager_with_teardown.remove(TEST_CATALOG_NAME) component_cache.wait_for_all_cache_tasks()
def start(self): super().start() # process options src_directory = self.directory_option.value try: json_files = [ f for f in os.listdir(src_directory) if f.endswith(".json") ] except OSError as e: print( f"Unable to reach the '{src_directory}' directory: {e.strerror}: '{e.filename}'" ) self.exit(1) if len(json_files) == 0: print( f"No instances for import found in the '{src_directory}' directory" ) return metadata_file = None non_imported_files = [] for file in json_files: filepath = os.path.join(src_directory, file) try: with open(filepath) as f: metadata_file = json.loads(f.read()) except OSError as e: non_imported_files.append([file, e.strerror]) continue name = os.path.splitext(file)[0] try: schema_name = metadata_file["schema_name"] display_name = metadata_file["display_name"] metadata = metadata_file["metadata"] except KeyError as e: non_imported_files.append([ file, f"Could not find '{e.args[0]}' key in the import file '{filepath}'" ]) continue try: if self.overwrite_flag.value: # if overwrite flag is true try: # try updating the existing instance updated_instance = self.metadata_manager.get(name) updated_instance.schema_name = schema_name if display_name: updated_instance.display_name = display_name if name: updated_instance.name = name updated_instance.metadata.update(metadata) self.metadata_manager.update(name, updated_instance) except MetadataNotFoundError: # no existing instance - create new instance = Metadata(schema_name=schema_name, name=name, display_name=display_name, metadata=metadata) self.metadata_manager.create(name, instance) else: instance = Metadata(schema_name=schema_name, name=name, display_name=display_name, metadata=metadata) self.metadata_manager.create(name, instance) except Exception as e: if isinstance(e, MetadataExistsError): non_imported_files.append( [file, f"{str(e)} Use '--overwrite' to update."]) else: non_imported_files.append([file, str(e)]) instance_count_not_imported = len(non_imported_files) instance_count_imported = len(json_files) - instance_count_not_imported print(f"Imported {instance_count_imported} " + ("instance" if instance_count_imported == 1 else "instances")) if instance_count_not_imported > 0: print(f"{instance_count_not_imported} " + ("instance" if instance_count_not_imported == 1 else "instances") + " could not be imported") non_imported_files.sort(key=lambda x: x[0]) print("\nThe following files could not be imported: ") # pad to width of longest file and reason max_file_name_len = len("File") max_reason_len = len("Reason") for file in non_imported_files: max_file_name_len = max(len(file[0]), max_file_name_len) max_reason_len = max(len(file[1]), max_reason_len) print( f"{'File'.ljust(max_file_name_len)} {'Reason'.ljust(max_reason_len)}" ) print( f"{'----'.ljust(max_file_name_len)} {'------'.ljust(max_reason_len)}" ) for file in non_imported_files: print( f"{file[0].ljust(max_file_name_len)} {file[1].ljust(max_reason_len)}" )