def test_serialization(X_y_binary, ts_data, tmpdir, helper_functions): path = os.path.join(str(tmpdir), 'component.pkl') for component_class in all_components(): print('Testing serialization of component {}'.format(component_class.name)) try: component = helper_functions.safe_init_component_with_njobs_1(component_class) except EnsembleMissingPipelinesError: if (component_class == StackedEnsembleClassifier): component = component_class(input_pipelines=[make_pipeline_from_components([RandomForestClassifier()], ProblemTypes.BINARY)], n_jobs=1) elif (component_class == StackedEnsembleRegressor): component = component_class(input_pipelines=[make_pipeline_from_components([RandomForestRegressor()], ProblemTypes.REGRESSION)], n_jobs=1) if isinstance(component, Estimator) and ProblemTypes.TIME_SERIES_REGRESSION in component.supported_problem_types: X, y = ts_data else: X, y = X_y_binary component.fit(X, y) for pickle_protocol in range(cloudpickle.DEFAULT_PROTOCOL + 1): component.save(path, pickle_protocol=pickle_protocol) loaded_component = ComponentBase.load(path) assert component.parameters == loaded_component.parameters assert component.describe(return_dict=True) == loaded_component.describe(return_dict=True) if (issubclass(component_class, Estimator) and not (isinstance(component, StackedEnsembleClassifier) or isinstance(component, StackedEnsembleRegressor))): assert (component.feature_importance == loaded_component.feature_importance).all()
def generate_pipeline_code(element): """Creates and returns a string that contains the Python imports and code required for running the EvalML pipeline. Arguments: element (pipeline instance): The instance of the pipeline to generate string Python code Returns: String representation of Python code that can be run separately in order to recreate the pipeline instance. Does not include code for custom component implementation. """ # hold the imports needed and add code to end code_strings = ['import json'] if not isinstance(element, PipelineBase): raise ValueError( "Element must be a pipeline instance, received {}".format( type(element))) if isinstance(element.component_graph, dict): raise ValueError( "Code generation for nonlinear pipelines is not supported yet") component_graph_string = ',\n\t\t'.join([ com.__class__.__name__ if com.__class__ not in all_components() else "'{}'".format(com.name) for com in element ]) code_strings.append("from {} import {}".format( element.__class__.__bases__[0].__module__, element.__class__.__bases__[0].__name__)) # check for other attributes associated with pipeline (ie name, custom_hyperparameters) pipeline_list = [] for k, v in sorted(list( filter(lambda item: item[0][0] != '_', element.__class__.__dict__.items())), key=lambda x: x[0]): if k == 'component_graph': continue pipeline_list += ["{} = '{}'".format(k, v)] if isinstance( v, str) else ["{} = {}".format(k, v)] pipeline_string = "\t" + "\n\t".join(pipeline_list) + "\n" if len( pipeline_list) else "" try: ret = json.dumps(element.parameters, indent='\t') except TypeError: raise TypeError( f"Value {element.parameters} cannot be JSON-serialized") # create the base string for the pipeline base_string = "\nclass {0}({1}):\n" \ "\tcomponent_graph = [\n\t\t{2}\n\t]\n" \ "{3}" \ "\nparameters = json.loads(\"\"\"{4}\"\"\")\n" \ "pipeline = {0}(parameters)" \ .format(element.__class__.__name__, element.__class__.__bases__[0].__name__, component_graph_string, pipeline_string, ret) code_strings.append(base_string) return "\n".join(code_strings)
def test_components_init_kwargs(): for component_class in all_components(): try: component = component_class() except EnsembleMissingPipelinesError: continue if component._component_obj is None: continue obj_class = component._component_obj.__class__.__name__ module = component._component_obj.__module__ importlib.import_module(module, obj_class) patched = module + '.' + obj_class + '.__init__' def all_init(self, *args, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) with patch(patched, new=all_init) as _: component = component_class(test_arg="test") component_with_different_kwargs = component_class(diff_test_arg="test") assert component.parameters['test_arg'] == "test" if not isinstance(component, PolynomialDetrender): assert component._component_obj.test_arg == "test" # Test equality of different components with same or different kwargs assert component == component_class(test_arg="test") assert component != component_with_different_kwargs
def test_no_fitting_required_components(X_y_binary, test_estimator_needs_fitting_false, helper_functions): X, y = X_y_binary for component_class in all_components() + [test_estimator_needs_fitting_false]: if not component_class.needs_fitting: component = helper_functions.safe_init_component_with_njobs_1(component_class) if issubclass(component_class, Estimator): component.predict(X) else: component.transform(X, y)
def test_all_components(has_minimal_dependencies, is_running_py_39_or_above, is_using_conda): if has_minimal_dependencies: n_components = 37 elif is_using_conda: n_components = 48 elif is_running_py_39_or_above: n_components = 47 else: n_components = 49 assert len(all_components()) == n_components
def test_component_parameters_init(logistic_regression_binary_pipeline_class, linear_regression_pipeline_class): for component_class in all_components(): print('Testing component {}'.format(component_class.name)) try: component = component_class() except EnsembleMissingPipelinesError: if component_class == StackedEnsembleClassifier: component = component_class(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})]) elif component_class == StackedEnsembleRegressor: component = component_class(input_pipelines=[linear_regression_pipeline_class(parameters={})]) parameters = component.parameters component2 = component_class(**parameters) parameters2 = component2.parameters assert parameters == parameters2
def test_handle_component_class_names(): for cls in all_components(): cls_ret = handle_component_class(cls) assert inspect.isclass(cls_ret) assert issubclass(cls_ret, ComponentBase) name_ret = handle_component_class(cls.name) assert inspect.isclass(name_ret) assert issubclass(name_ret, ComponentBase) invalid_name = 'This Component Does Not Exist' with pytest.raises(MissingComponentError, match='Component "This Component Does Not Exist" was not found'): handle_component_class(invalid_name) class NonComponent: pass with pytest.raises(ValueError): handle_component_class(NonComponent())
def test_component_has_random_seed(): for component_class in all_components(): params = inspect.signature(component_class.__init__).parameters assert "random_seed" in params
elif isinstance(component, DFSTransformer): assert transform_output.shape[0] == X.shape[0] assert transform_output.shape[1] >= X.shape[1] elif isinstance(component, TargetImputer): assert transform_output[0].shape == X.shape assert transform_output[1].shape[0] == X.shape[0] assert len(transform_output[1].shape) == 1 elif 'sampler' in component.name: assert transform_output[0].shape == X.shape assert transform_output[1].shape[0] == X.shape[0] else: assert transform_output.shape == X.shape assert (list(transform_output.columns) == list(X_cols_expected)) @pytest.mark.parametrize("cls", [cls for cls in all_components() if cls not in [StackedEnsembleRegressor, StackedEnsembleClassifier]]) def test_default_parameters(cls): assert cls.default_parameters == cls().parameters, f"{cls.__name__}'s default parameters don't match __init__." @pytest.mark.parametrize("cls", [cls for cls in all_components() if cls not in [StackedEnsembleRegressor, StackedEnsembleClassifier]]) def test_default_parameters_raise_no_warnings(cls): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") cls() assert len(w) == 0 def test_estimator_check_for_fit(X_y_binary): class MockEstimatorObj(): def __init__(self):
def test_all_components(has_minimal_dependencies): if has_minimal_dependencies: assert len(all_components()) == 33 else: assert len(all_components()) == 40
def test_all_components(has_minimal_dependencies, is_running_py_39_or_above): if has_minimal_dependencies: assert len(all_components()) == 37 else: n_components = 45 if is_running_py_39_or_above else 46 assert len(all_components()) == n_components
assert transform_output.shape == (X.shape[0], 0) elif isinstance(component, PCA) or isinstance( component, LinearDiscriminantAnalysis): assert transform_output.shape[0] == X.shape[0] assert transform_output.shape[1] <= X.shape[1] elif isinstance(component, DFSTransformer): assert transform_output.shape[0] == X.shape[0] assert transform_output.shape[1] >= X.shape[1] else: assert transform_output.shape == X.shape assert (list( transform_output.columns) == list(X_cols_expected)) @pytest.mark.parametrize("cls", [ cls for cls in all_components() if cls not in [StackedEnsembleRegressor, StackedEnsembleClassifier] ]) def test_default_parameters(cls): assert cls.default_parameters == cls( ).parameters, f"{cls.__name__}'s default parameters don't match __init__." @pytest.mark.parametrize("cls", [ cls for cls in all_components() if cls not in [StackedEnsembleRegressor, StackedEnsembleClassifier] ]) def test_default_parameters_raise_no_warnings(cls): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") cls()