def test_parse_kfp_component_file_no_inputs(): # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Read contents of given path path = _get_resource_path("kfp_test_operator_no_inputs.yaml") catalog_entry_data = {"path": path} # Construct a catalog instance catalog_type = "local-file-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["path"]) # Parse the component entry parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR) component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Properties JSON should only include the two parameters common to every # component:'label' and 'component_source', the component description if # exists (which it does for this component), and the output parameter for # this component num_common_params = 4 assert len( properties_json["current_parameters"].keys()) == num_common_params assert len(properties_json["parameters"]) == num_common_params assert len( properties_json["uihints"]["parameter_info"]) == num_common_params # Total number of groups includes one for each parameter, # plus one for the output group header, # plus 1 for the component_source header num_groups = num_common_params + 2 assert len(properties_json["uihints"]["group_info"][0] ["group_info"]) == num_groups # Ensure that template still renders the two common parameters correctly assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source
def parse(self, catalog_entry: CatalogEntry) -> Optional[List[Component]]: components: List[Component] = [] definition = catalog_entry.entry_data.definition if not definition: return None entry_reference = catalog_entry.entry_reference # Parse the component definition for all defined Operator classes try: parsed_class_nodes = self._parse_all_classes(definition) num_operator_classes = len(parsed_class_nodes) except Exception as e: self.log.error( f"Content associated with identifier '{entry_reference}' could not be parsed: {e}. Skipping..." ) return None for component_class, content in parsed_class_nodes.items(): if not content.get("init_function"): # Without the init function, class can't be parsed for properties self.log.warning( f"Operator '{component_class}' associated with identifier '{entry_reference}' " f"does not have an __init__ function. Skipping...") continue # Assign component name and unique id component_id = catalog_entry.id if num_operator_classes > 1: # This file contains more than one operator and id must be adjusted # to include the Operator class name as well component_id += f":{component_class}" # Get the properties for this Operator class try: component_properties: List[ ComponentParameter] = self._parse_properties_from_init( **content) except Exception as e: self.log.error( f"Properties of operator '{component_class}' associated with identifier '{entry_reference}' " f"could not be parsed: {e}. Skipping...") continue component = catalog_entry.get_component( id=component_id, name=component_class, description=DEFAULT_DESCRIPTION, properties=component_properties, file_extension=self._file_types[0], ) components.append(component) return components
def test_parse_kfp_component_url(): # Define the appropriate reader for a URL-type component definition kfp_supported_file_types = [".yaml"] reader = UrlComponentCatalogConnector(kfp_supported_file_types) # Read contents of given path url = "https://raw.githubusercontent.com/kubeflow/pipelines/1.4.1/components/notebooks/Run_notebook_using_papermill/component.yaml" # noqa: E501 catalog_entry_data = {"url": url} # Construct a catalog instance catalog_type = "url-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["url"]) # Parse the component entry parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR) component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Ensure component parameters are prefixed (and system parameters are not) and all hold correct values assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source assert properties_json["current_parameters"][ "elyra_notebook"] == "None" # Default value for type `inputpath` assert properties_json["current_parameters"]["elyra_parameters"] == { "StringControl": "{}", "activeControl": "StringControl", } assert properties_json["current_parameters"][ "elyra_packages_to_install"] == { "StringControl": "[]", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_input_data"] == { "StringControl": "", "activeControl": "StringControl", }
def test_parse_airflow_component_url(): # Define the appropriate reader for a URL-type component definition airflow_supported_file_types = [".py"] reader = UrlComponentCatalogConnector(airflow_supported_file_types) # Read contents of given path url = "https://raw.githubusercontent.com/apache/airflow/1.10.15/airflow/operators/bash_operator.py" # noqa: E501 catalog_entry_data = {"url": url} # Construct a catalog instance catalog_type = "url-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["url"]) # Parse the component entry parser = ComponentParser.create_instance(platform=RUNTIME_PROCESSOR) component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Ensure component parameters are prefixed, and system parameters are not, and hold correct values assert properties_json["current_parameters"]["label"] == "" # Helper method to retrieve the requested parameter value from the dictionary def get_parameter(param_name): property_dict = properties_json["current_parameters"][param_name] return property_dict[property_dict["activeControl"]] component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source assert get_parameter("elyra_bash_command") == "" assert get_parameter("elyra_xcom_push") is True assert get_parameter("elyra_env") == "{}" # {} assert get_parameter("elyra_output_encoding") == "utf-8"
async def test_parse_components_invalid_yaml(caplog): # Get resource path and read definition (by-pass catalog reader functionality) path = _get_resource_path("kfp_test_invalid_component.yaml") with open(path, "r") as f: definition = f.read() # Manually construct catalog_entry_data object and catalog instance catalog_entry_data = {"path": path} catalog_type = "local-file-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = EntryData(definition=definition) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["path"]) # Parse the component entry parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR) component = parser.parse(catalog_entry) # Failed YAML schema validation returns None assert component is None # Assert validation error is captured appropriately in log assert "Invalid format of YAML definition for component" in caplog.text assert "Failed validating 'type'" in caplog.text assert "On instance['inputs'][0]['name']:\n 2" in caplog.text caplog.clear() # Modify file to get expected error in YAML safe_load new_definition = "key with no mapping\n" + definition catalog_entry.entry_data.definition = new_definition # Re-parse with new definition content component = parser.parse(catalog_entry) # Failed YAML safe_load returns None assert component is None # Assert load error is captured appropriately in log assert "Could not load YAML definition for component" in caplog.text assert "mapping values are not allowed here" in caplog.text
def parse(self, catalog_entry: CatalogEntry) -> Optional[List[Component]]: # Get YAML object from component definition component_yaml = self._read_component_yaml(catalog_entry) if not component_yaml: return None # Assign component_id and description description = "" if component_yaml.get("description"): # Remove whitespace characters and replace with spaces description = " ".join(component_yaml.get("description").split()) component_properties = self._parse_properties(component_yaml) component = catalog_entry.get_component( id=catalog_entry.id, name=component_yaml.get("name"), description=description, properties=component_properties, file_extension=self._file_types[0], ) return [component]
def test_parse_airflow_component_file(): # Define the appropriate reader for a filesystem-type component definition airflow_supported_file_types = [".py"] reader = FilesystemComponentCatalogConnector(airflow_supported_file_types) # Read contents of given path path = _get_resource_path("airflow_test_operator.py") catalog_entry_data = {"path": path} # Construct a catalog instance catalog_type = "local-file-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["path"]) # Parse the component entry parser = ComponentParser.create_instance(platform=RUNTIME_PROCESSOR) components = parser.parse(catalog_entry) assert len( components ) == 3 # TestOperator, DeriveFromTestOperator, and DeriveFromImportedOperator # Split components list into its constituent operators components = sorted(components, key=lambda component: component.id) import_test_op, derive_test_op, test_op = components[0], components[ 1], components[2] # Helper method to retrieve the requested parameter value from the dictionary def get_parameter_value(param_name): property_dict = properties_json["current_parameters"][param_name] return property_dict[property_dict["activeControl"]] # Helper method to retrieve the requested parameter info from the dictionary def get_parameter_format(param_name, control_id="StringControl"): param_info = None for prop_info in properties_json["uihints"]["parameter_info"]: if prop_info.get("parameter_ref") == param_name: param_info = prop_info["data"]["controls"][control_id][ "format"] break return param_info # Helper method to retrieve the requested parameter description from the dictionary def get_parameter_description(param_name): param_desc = None for prop_info in properties_json["uihints"]["parameter_info"]: if prop_info.get("parameter_ref") == param_name: param_desc = prop_info["description"]["default"] break return param_desc # Helper method to retrieve whether the requested parameter is required def get_parameter_required(param_name): param_info = None for prop_info in properties_json["uihints"]["parameter_info"]: if prop_info.get("parameter_ref") == param_name: param_info = prop_info["data"]["required"] break return param_info # Retrieve properties for TestOperator # Test Operator does not include type hints for the init function args properties_json = ComponentCache.to_canvas_properties(test_op) # Ensure system parameters are not prefixed and hold correct values assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source # Ensure component parameters are prefixed with 'elyra_' and values are as expected assert get_parameter_value("elyra_str_no_default") == "" assert get_parameter_value("elyra_str_default") == "default" assert get_parameter_value("elyra_str_empty") == "" assert get_parameter_value("elyra_str_not_in_docstring") == "" assert get_parameter_value("elyra_bool_no_default") is False assert get_parameter_value("elyra_bool_default_false") is False assert get_parameter_value("elyra_bool_default_true") is True assert get_parameter_value("elyra_bool_not_in_docstring") is False assert get_parameter_value("elyra_int_no_default") == 0 assert get_parameter_value("elyra_int_default_zero") == 0 assert get_parameter_value("elyra_int_default_non_zero") == 2 assert get_parameter_value("elyra_int_not_in_docstring") == 3 assert get_parameter_value("elyra_dict_default_is_none") == "{}" # {} assert get_parameter_value("elyra_list_default_is_none") == "[]" # [] # Ensure that type information is inferred correctly for properties that # define 'unusual' types, such as 'a dictionary of lists' assert get_parameter_format("elyra_unusual_type_dict") == "dictionary" assert get_parameter_format("elyra_unusual_type_list") == "list" # Ensure that type information falls back to string if no type hint present # and no ':type: <type info>' phrase found in docstring assert get_parameter_format("elyra_fallback_type") == "string" # Ensure component parameters are marked as required in the correct circumstances # (parameter is required if there is no default value provided or if a type hint # does not include 'Optional[...]') assert get_parameter_required("elyra_str_no_default") is True assert get_parameter_required("elyra_str_default") is False assert get_parameter_required("elyra_str_empty") is False # Ensure descriptions are rendered properly with type hint in parentheses assert (get_parameter_description("elyra_unusual_type_dict") == "a dictionary parameter with the " "phrase 'list' in type description " "(type: a dictionary of arrays)") assert (get_parameter_description("elyra_unusual_type_list") == "a list parameter with the phrase " "'string' in type description " "(type: a list of strings)") assert get_parameter_description("elyra_fallback_type") == "(type: str)" # Ensure that a long description with line wrapping and a backslash escape has rendered # (and hence did not raise an error during json.loads in the properties API request) parsed_description = """a string parameter with a very long description that wraps lines and also has an escaped underscore in it, as shown here: (\_) # noqa W605""" modified_description = parsed_description.replace( "\n", " ") + " (type: str)" # modify desc acc. to parser rules assert get_parameter_description( "elyra_long_description_property") == modified_description # Retrieve properties for DeriveFromTestOperator # DeriveFromTestOperator includes type hints for all init arguments properties_json = ComponentCache.to_canvas_properties(derive_test_op) # Ensure default values are parsed correct in the case where type hints are present assert get_parameter_value("elyra_str_default") == "default" assert get_parameter_value("elyra_bool_default") is True assert get_parameter_value("elyra_int_default") == 2 # Ensure component parameters are prefixed with 'elyra_' and types are as expected # in the case when a type hint is provided (and regardless of whether or not the # parameter type is included in the docstring) assert get_parameter_format("elyra_str_no_default") == "string" assert get_parameter_format("elyra_str_default") == "string" assert get_parameter_format("elyra_str_optional_default") == "string" assert get_parameter_format("elyra_str_not_in_docstring") == "string" assert get_parameter_format("elyra_bool_no_default", "BooleanControl") == "boolean" assert get_parameter_format("elyra_bool_default", "BooleanControl") == "boolean" assert get_parameter_format("elyra_bool_not_in_docstring", "BooleanControl") == "boolean" assert get_parameter_format("elyra_int_no_default", "NumberControl") == "number" assert get_parameter_format("elyra_int_default", "NumberControl") == "number" assert get_parameter_format("elyra_int_not_in_docstring", "NumberControl") == "number" assert get_parameter_format("elyra_list_optional_default") == "list" # Ensure component parameters are marked as required in the correct circumstances assert get_parameter_required("elyra_str_no_default") is True assert get_parameter_required("elyra_str_default") is False assert get_parameter_required("elyra_str_optional_default") is False assert get_parameter_required("elyra_str_not_in_docstring") is True # Retrieve properties for DeriveFromImportedOperator # DeriveFromImportedOperator includes type hints for dictionary and # list values to test the more complex parsing required in this case properties_json = ComponentCache.to_canvas_properties(import_test_op) # Ensure component parameters are prefixed with 'elyra_' and types are as expected assert get_parameter_format("elyra_dict_no_default") == "dictionary" assert get_parameter_format( "elyra_dict_optional_no_default") == "dictionary" assert get_parameter_format("elyra_nested_dict_default") == "dictionary" assert get_parameter_format("elyra_dict_not_in_docstring") == "dictionary" assert get_parameter_format("elyra_list_no_default") == "list" assert get_parameter_format("elyra_list_optional_no_default") == "list" assert get_parameter_format("elyra_list_default") == "list" assert get_parameter_format("elyra_list_optional_default") == "list" assert get_parameter_format("elyra_list_not_in_docstring") == "list" assert get_parameter_value("elyra_dict_no_default") == "{}" assert get_parameter_value("elyra_list_no_default") == "[]"
async def test_parse_components_additional_metatypes(): # Define the appropriate reader for a URL-type component definition kfp_supported_file_types = [".yaml"] reader = UrlComponentCatalogConnector(kfp_supported_file_types) # Read contents of given path url = "https://raw.githubusercontent.com/kubeflow/pipelines/1.4.1/components/keras/Train_classifier/from_CSV/component.yaml" # noqa: E501 catalog_entry_data = {"url": url} # Construct a catalog instance catalog_type = "url-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["url"]) # Parse the component entry parser = KfpComponentParser() component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Ensure component parameters are prefixed (and system parameters are not) and all hold correct values assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source assert properties_json["current_parameters"][ "elyra_training_features"] == "None" # inputPath assert properties_json["current_parameters"][ "elyra_training_labels"] == "None" # inputPath assert properties_json["current_parameters"][ "elyra_network_json"] == "None" # inputPath assert properties_json["current_parameters"]["elyra_loss_name"] == { "StringControl": "categorical_crossentropy", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_num_classes"] == { "NumberControl": 0, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_optimizer"] == { "StringControl": "rmsprop", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_optimizer_config"] == { "StringControl": "", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_learning_rate"] == { "NumberControl": 0.01, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_num_epochs"] == { "NumberControl": 100, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_batch_size"] == { "NumberControl": 32, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_metrics"] == { "StringControl": "['accuracy']", "activeControl": "StringControl", } assert properties_json["current_parameters"]["elyra_random_seed"] == { "NumberControl": 0, "activeControl": "NumberControl", }
def test_parse_kfp_component_file(): # Define the appropriate reader for a filesystem-type component definition kfp_supported_file_types = [".yaml"] reader = FilesystemComponentCatalogConnector(kfp_supported_file_types) # Read contents of given path path = _get_resource_path("kfp_test_operator.yaml") catalog_entry_data = {"path": path} # Construct a catalog instance catalog_type = "local-file-catalog" catalog_instance = ComponentCatalogMetadata(schema_name=catalog_type, metadata={ "categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name }) # Build the catalog entry data structures required for parsing entry_data = reader.get_entry_data(catalog_entry_data, {}) catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["path"]) # Parse the component entry parser = KfpComponentParser.create_instance(platform=RUNTIME_PROCESSOR) component = parser.parse(catalog_entry)[0] properties_json = ComponentCache.to_canvas_properties(component) # Ensure description is rendered even with an unescaped character description = 'This component description contains an unescaped " character' assert properties_json["current_parameters"][ "component_description"] == description # Ensure component parameters are prefixed (and system parameters are not) and all hold correct values assert properties_json["current_parameters"]["label"] == "" component_source = json.dumps({ "catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference }) assert properties_json["current_parameters"][ "component_source"] == component_source assert properties_json["current_parameters"][ "elyra_test_string_no_default"] == { "StringControl": "", "activeControl": "StringControl", } assert properties_json["current_parameters"][ "elyra_test_string_default_value"] == { "StringControl": "default", "activeControl": "StringControl", } assert properties_json["current_parameters"][ "elyra_test_string_default_empty"] == { "StringControl": "", "activeControl": "StringControl", } assert properties_json["current_parameters"][ "elyra_test_bool_default"] == { "BooleanControl": False, "activeControl": "BooleanControl", } assert properties_json["current_parameters"]["elyra_test_bool_false"] == { "BooleanControl": False, "activeControl": "BooleanControl", } assert properties_json["current_parameters"]["elyra_test_bool_true"] == { "BooleanControl": True, "activeControl": "BooleanControl", } assert properties_json["current_parameters"]["elyra_test_int_default"] == { "NumberControl": 0, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_test_int_zero"] == { "NumberControl": 0, "activeControl": "NumberControl", } assert properties_json["current_parameters"][ "elyra_test_int_non_zero"] == { "NumberControl": 1, "activeControl": "NumberControl", } assert properties_json["current_parameters"][ "elyra_test_float_default"] == { "NumberControl": 0.0, "activeControl": "NumberControl", } assert properties_json["current_parameters"]["elyra_test_float_zero"] == { "NumberControl": 0.0, "activeControl": "NumberControl", } assert properties_json["current_parameters"][ "elyra_test_float_non_zero"] == { "NumberControl": 1.0, "activeControl": "NumberControl", } assert properties_json["current_parameters"][ "elyra_test_dict_default"] == { "StringControl": "{}", "activeControl": "StringControl", } # {} assert properties_json["current_parameters"][ "elyra_test_list_default"] == { "StringControl": "[]", "activeControl": "StringControl", } # [] # Ensure that the 'required' attribute was set correctly. KFP components default to required # unless explicitly marked otherwise in component YAML. required_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_required_property") assert required_property["data"]["required"] is True optional_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_optional_property") assert optional_property["data"]["required"] is False default_required_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_required_property_default") assert default_required_property["data"]["required"] is True # Ensure that type information is inferred correctly unusual_dict_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_dict") assert unusual_dict_property["data"]["controls"]["StringControl"][ "format"] == "dictionary" unusual_list_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_list") assert unusual_list_property["data"]["controls"]["StringControl"][ "format"] == "list" unusual_string_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_string") assert unusual_string_property["data"]["controls"]["StringControl"][ "format"] == "string" file_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_file") assert file_property["data"]["format"] == "inputpath" no_type_property = next( prop for prop in properties_json["uihints"]["parameter_info"] if prop.get("parameter_ref") == "elyra_test_unusual_type_notgiven") assert no_type_property["data"]["controls"]["StringControl"][ "format"] == "string" # Ensure descriptions are rendered properly with type hint in parentheses assert (unusual_dict_property["description"]["default"] == "The test command description " "(type: Dictionary of arrays)") assert unusual_list_property["description"][ "default"] == "The test command description (type: An array)" assert unusual_string_property["description"][ "default"] == "The test command description (type: A string)" assert ( file_property["description"]["default"] == "The test command description" ) # No data type info is included in parentheses for inputPath variables assert no_type_property["description"][ "default"] == "The test command description (type: string)"