예제 #1
0
def test_transformer_transform_output_type(X_y_binary):
    X_np, y_np = X_y_binary
    assert isinstance(X_np, np.ndarray)
    assert isinstance(y_np, np.ndarray)
    y_list = list(y_np)
    X_df_no_col_names = pd.DataFrame(X_np)
    range_index = pd.RangeIndex(start=0, stop=X_np.shape[1], step=1)
    X_df_with_col_names = pd.DataFrame(X_np, columns=['x' + str(i) for i in range(X_np.shape[1])])
    y_series_no_name = pd.Series(y_np)
    y_series_with_name = pd.Series(y_np, name='target')
    datatype_combos = [(X_np, y_np, range_index),
                       (X_np, y_list, range_index),
                       (X_df_no_col_names, y_series_no_name, range_index),
                       (X_df_with_col_names, y_series_with_name, X_df_with_col_names.columns)]

    for component_class in _all_transformers():
        print('Testing transformer {}'.format(component_class.name))
        for X, y, X_cols_expected in datatype_combos:
            print('Checking output of transform for transformer "{}" on X type {} cols {}, y type {} name {}'
                  .format(component_class.name, type(X),
                          X.columns if isinstance(X, pd.DataFrame) else None, type(y),
                          y.name if isinstance(y, pd.Series) else None))

            component = component_class()

            component.fit(X, y=y)
            transform_output = component.transform(X, y=y)
            assert isinstance(transform_output, ww.DataTable)

            if isinstance(component, SelectColumns):
                assert transform_output.shape == (X.shape[0], 0)
            elif isinstance(component, PCA) or isinstance(component, LinearDiscriminantAnalysis):
                assert transform_output.shape[0] == X.shape[0]
                assert transform_output.shape[1] <= X.shape[1]
            elif isinstance(component, DFSTransformer):
                assert transform_output.shape[0] == X.shape[0]
                assert transform_output.shape[1] >= X.shape[1]
            elif isinstance(component, DelayedFeatureTransformer):
                # We just want to check that DelayedFeaturesTransformer outputs a DataFrame
                # The dataframe shape and index are checked in test_delayed_features_transformer.py
                continue
            else:
                assert transform_output.shape == X.shape
                assert (list(transform_output.columns) == list(X_cols_expected))

            transform_output = component.fit_transform(X, y=y)
            assert isinstance(transform_output, ww.DataTable)

            if isinstance(component, SelectColumns):
                assert transform_output.shape == (X.shape[0], 0)
            elif isinstance(component, PCA) or isinstance(component, LinearDiscriminantAnalysis):
                assert transform_output.shape[0] == X.shape[0]
                assert transform_output.shape[1] <= X.shape[1]
            elif isinstance(component, DFSTransformer):
                assert transform_output.shape[0] == X.shape[0]
                assert transform_output.shape[1] >= X.shape[1]
            else:
                assert transform_output.shape == X.shape
                assert (list(transform_output.columns) == list(X_cols_expected))
예제 #2
0
def test_all_transformers_needs_fitting():
    for component_class in _all_transformers() + _all_estimators():
        if component_class.__name__ in [
                'DropColumns', 'SelectColumns', 'DelayedFeatureTransformer'
        ]:
            assert not component_class.needs_fitting
        else:
            assert component_class.needs_fitting
예제 #3
0
def test_all_transformers_check_fit_input_type(data_type, X_y_binary, make_data_type):
    X, y = X_y_binary
    X = make_data_type(data_type, X)
    y = make_data_type(data_type, y)
    for component_class in _all_transformers():
        if not component_class.needs_fitting:
            continue

        component = component_class()
        component.fit(X, y)
예제 #4
0
def test_all_transformers_check_fit(X_y_binary):
    X, y = X_y_binary
    for component_class in _all_transformers():
        if not component_class.needs_fitting:
            continue

        component = component_class()
        with pytest.raises(ComponentNotYetFittedError, match=f'You must fit {component_class.__name__}'):
            component.transform(X, y)

        component.fit(X, y)
        component.transform(X, y)

        component = component_class()
        component.fit_transform(X, y)
        component.transform(X, y)
예제 #5
0
    MockComponent, MockEstimator, MockTransformer = test_classes

    expected_code = "mockComponent = MockComponent(**{})"
    component_code = generate_component_code(MockComponent())
    assert component_code == expected_code

    expected_code = "mockEstimator = MockEstimator(**{})"
    component_code = generate_component_code(MockEstimator())
    assert component_code == expected_code

    expected_code = "mockTransformer = MockTransformer(**{})"
    component_code = generate_component_code(MockTransformer())
    assert component_code == expected_code


@pytest.mark.parametrize("transformer_class", _all_transformers())
@pytest.mark.parametrize("use_custom_index", [True, False])
def test_transformer_fit_and_transform_respect_custom_indices(use_custom_index, transformer_class, X_y_binary):

    check_names = True
    if transformer_class == DFSTransformer:
        check_names = False
        if use_custom_index:
            pytest.skip("The DFSTransformer changes the index so we skip it.")
    if transformer_class == PolynomialDetrender:
        pytest.skip("Skipping PolynomialDetrender because we test that it respects custom indices in "
                    "test_polynomial_detrender.py")

    X, y = X_y_binary

    X = pd.DataFrame(X)