Esempio n. 1
0
def test_serialization(X_y_binary, ts_data, tmpdir, helper_functions):
    path = os.path.join(str(tmpdir), 'component.pkl')
    for component_class in all_components():
        print('Testing serialization of component {}'.format(component_class.name))
        try:
            component = helper_functions.safe_init_component_with_njobs_1(component_class)
        except EnsembleMissingPipelinesError:
            if (component_class == StackedEnsembleClassifier):
                component = component_class(input_pipelines=[make_pipeline_from_components([RandomForestClassifier()], ProblemTypes.BINARY)], n_jobs=1)
            elif (component_class == StackedEnsembleRegressor):
                component = component_class(input_pipelines=[make_pipeline_from_components([RandomForestRegressor()], ProblemTypes.REGRESSION)], n_jobs=1)
        if isinstance(component, Estimator) and ProblemTypes.TIME_SERIES_REGRESSION in component.supported_problem_types:
            X, y = ts_data
        else:
            X, y = X_y_binary

        component.fit(X, y)

        for pickle_protocol in range(cloudpickle.DEFAULT_PROTOCOL + 1):
            component.save(path, pickle_protocol=pickle_protocol)
            loaded_component = ComponentBase.load(path)
            assert component.parameters == loaded_component.parameters
            assert component.describe(return_dict=True) == loaded_component.describe(return_dict=True)
            if (issubclass(component_class, Estimator) and not (isinstance(component, StackedEnsembleClassifier) or isinstance(component, StackedEnsembleRegressor))):
                assert (component.feature_importance == loaded_component.feature_importance).all()
Esempio n. 2
0
def generate_pipeline_code(element):
    """Creates and returns a string that contains the Python imports and code required for running the EvalML pipeline.

    Arguments:
        element (pipeline instance): The instance of the pipeline to generate string Python code

    Returns:
        String representation of Python code that can be run separately in order to recreate the pipeline instance.
        Does not include code for custom component implementation.
    """
    # hold the imports needed and add code to end
    code_strings = ['import json']
    if not isinstance(element, PipelineBase):
        raise ValueError(
            "Element must be a pipeline instance, received {}".format(
                type(element)))
    if isinstance(element.component_graph, dict):
        raise ValueError(
            "Code generation for nonlinear pipelines is not supported yet")

    component_graph_string = ',\n\t\t'.join([
        com.__class__.__name__
        if com.__class__ not in all_components() else "'{}'".format(com.name)
        for com in element
    ])
    code_strings.append("from {} import {}".format(
        element.__class__.__bases__[0].__module__,
        element.__class__.__bases__[0].__name__))
    # check for other attributes associated with pipeline (ie name, custom_hyperparameters)
    pipeline_list = []
    for k, v in sorted(list(
            filter(lambda item: item[0][0] != '_',
                   element.__class__.__dict__.items())),
                       key=lambda x: x[0]):
        if k == 'component_graph':
            continue
        pipeline_list += ["{} = '{}'".format(k, v)] if isinstance(
            v, str) else ["{} = {}".format(k, v)]

    pipeline_string = "\t" + "\n\t".join(pipeline_list) + "\n" if len(
        pipeline_list) else ""

    try:
        ret = json.dumps(element.parameters, indent='\t')
    except TypeError:
        raise TypeError(
            f"Value {element.parameters} cannot be JSON-serialized")
    # create the base string for the pipeline
    base_string = "\nclass {0}({1}):\n" \
                  "\tcomponent_graph = [\n\t\t{2}\n\t]\n" \
                  "{3}" \
                  "\nparameters = json.loads(\"\"\"{4}\"\"\")\n" \
                  "pipeline = {0}(parameters)" \
                  .format(element.__class__.__name__,
                          element.__class__.__bases__[0].__name__,
                          component_graph_string,
                          pipeline_string,
                          ret)
    code_strings.append(base_string)
    return "\n".join(code_strings)
Esempio n. 3
0
def test_components_init_kwargs():
    for component_class in all_components():
        try:
            component = component_class()
        except EnsembleMissingPipelinesError:
            continue
        if component._component_obj is None:
            continue

        obj_class = component._component_obj.__class__.__name__
        module = component._component_obj.__module__
        importlib.import_module(module, obj_class)
        patched = module + '.' + obj_class + '.__init__'

        def all_init(self, *args, **kwargs):
            for k, v in kwargs.items():
                setattr(self, k, v)

        with patch(patched, new=all_init) as _:
            component = component_class(test_arg="test")
            component_with_different_kwargs = component_class(diff_test_arg="test")
            assert component.parameters['test_arg'] == "test"
            if not isinstance(component, PolynomialDetrender):
                assert component._component_obj.test_arg == "test"
            # Test equality of different components with same or different kwargs
            assert component == component_class(test_arg="test")
            assert component != component_with_different_kwargs
Esempio n. 4
0
def test_no_fitting_required_components(X_y_binary, test_estimator_needs_fitting_false, helper_functions):
    X, y = X_y_binary
    for component_class in all_components() + [test_estimator_needs_fitting_false]:
        if not component_class.needs_fitting:
            component = helper_functions.safe_init_component_with_njobs_1(component_class)
            if issubclass(component_class, Estimator):
                component.predict(X)
            else:
                component.transform(X, y)
Esempio n. 5
0
def test_all_components(has_minimal_dependencies, is_running_py_39_or_above, is_using_conda):
    if has_minimal_dependencies:
        n_components = 37
    elif is_using_conda:
        n_components = 48
    elif is_running_py_39_or_above:
        n_components = 47
    else:
        n_components = 49
    assert len(all_components()) == n_components
Esempio n. 6
0
def test_component_parameters_init(logistic_regression_binary_pipeline_class,
                                   linear_regression_pipeline_class):
    for component_class in all_components():
        print('Testing component {}'.format(component_class.name))
        try:
            component = component_class()
        except EnsembleMissingPipelinesError:
            if component_class == StackedEnsembleClassifier:
                component = component_class(input_pipelines=[logistic_regression_binary_pipeline_class(parameters={})])
            elif component_class == StackedEnsembleRegressor:
                component = component_class(input_pipelines=[linear_regression_pipeline_class(parameters={})])
        parameters = component.parameters

        component2 = component_class(**parameters)
        parameters2 = component2.parameters

        assert parameters == parameters2
Esempio n. 7
0
def test_handle_component_class_names():
    for cls in all_components():
        cls_ret = handle_component_class(cls)
        assert inspect.isclass(cls_ret)
        assert issubclass(cls_ret, ComponentBase)
        name_ret = handle_component_class(cls.name)
        assert inspect.isclass(name_ret)
        assert issubclass(name_ret, ComponentBase)

    invalid_name = 'This Component Does Not Exist'
    with pytest.raises(MissingComponentError, match='Component "This Component Does Not Exist" was not found'):
        handle_component_class(invalid_name)

    class NonComponent:
        pass
    with pytest.raises(ValueError):
        handle_component_class(NonComponent())
Esempio n. 8
0
def test_component_has_random_seed():
    for component_class in all_components():
        params = inspect.signature(component_class.__init__).parameters
        assert "random_seed" in params
Esempio n. 9
0
            elif isinstance(component, DFSTransformer):
                assert transform_output.shape[0] == X.shape[0]
                assert transform_output.shape[1] >= X.shape[1]
            elif isinstance(component, TargetImputer):
                assert transform_output[0].shape == X.shape
                assert transform_output[1].shape[0] == X.shape[0]
                assert len(transform_output[1].shape) == 1
            elif 'sampler' in component.name:
                assert transform_output[0].shape == X.shape
                assert transform_output[1].shape[0] == X.shape[0]
            else:
                assert transform_output.shape == X.shape
                assert (list(transform_output.columns) == list(X_cols_expected))


@pytest.mark.parametrize("cls", [cls for cls in all_components() if cls not in [StackedEnsembleRegressor, StackedEnsembleClassifier]])
def test_default_parameters(cls):
    assert cls.default_parameters == cls().parameters, f"{cls.__name__}'s default parameters don't match __init__."


@pytest.mark.parametrize("cls", [cls for cls in all_components() if cls not in [StackedEnsembleRegressor, StackedEnsembleClassifier]])
def test_default_parameters_raise_no_warnings(cls):
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        cls()
        assert len(w) == 0


def test_estimator_check_for_fit(X_y_binary):
    class MockEstimatorObj():
        def __init__(self):
Esempio n. 10
0
def test_all_components(has_minimal_dependencies):
    if has_minimal_dependencies:
        assert len(all_components()) == 33
    else:
        assert len(all_components()) == 40
Esempio n. 11
0
def test_all_components(has_minimal_dependencies, is_running_py_39_or_above):
    if has_minimal_dependencies:
        assert len(all_components()) == 37
    else:
        n_components = 45 if is_running_py_39_or_above else 46
        assert len(all_components()) == n_components
Esempio n. 12
0
                assert transform_output.shape == (X.shape[0], 0)
            elif isinstance(component, PCA) or isinstance(
                    component, LinearDiscriminantAnalysis):
                assert transform_output.shape[0] == X.shape[0]
                assert transform_output.shape[1] <= X.shape[1]
            elif isinstance(component, DFSTransformer):
                assert transform_output.shape[0] == X.shape[0]
                assert transform_output.shape[1] >= X.shape[1]
            else:
                assert transform_output.shape == X.shape
                assert (list(
                    transform_output.columns) == list(X_cols_expected))


@pytest.mark.parametrize("cls", [
    cls for cls in all_components()
    if cls not in [StackedEnsembleRegressor, StackedEnsembleClassifier]
])
def test_default_parameters(cls):
    assert cls.default_parameters == cls(
    ).parameters, f"{cls.__name__}'s default parameters don't match __init__."


@pytest.mark.parametrize("cls", [
    cls for cls in all_components()
    if cls not in [StackedEnsembleRegressor, StackedEnsembleClassifier]
])
def test_default_parameters_raise_no_warnings(cls):
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        cls()