Example #1
0
def test_column_transformer_invalid_columns(remainder):
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T

    # general invalid
    for col in [1.5, ['string', 1], slice(1, 's'), np.array([1.])]:
        ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder)
        assert_raise_message(ValueError, "No valid specification", ct.fit,
                             X_array)

    # invalid for arrays
    for col in ['string', ['string', 'other'], slice('a', 'b')]:
        ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder)
        assert_raise_message(ValueError, "Specifying the columns", ct.fit,
                             X_array)

    # transformed n_features does not match fitted n_features
    col = [0, 1]
    ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder)
    ct.fit(X_array)
    X_array_more = np.array([[0, 1, 2], [2, 4, 6], [3, 6, 9]]).T
    msg = ("Given feature/column names or counts do not match the ones for "
           "the data given during fit.")
    with pytest.warns(DeprecationWarning, match=msg):
        ct.transform(X_array_more)  # Should accept added columns, for now
    X_array_fewer = np.array([
        [0, 1, 2],
    ]).T
    err_msg = 'Number of features'
    with pytest.raises(ValueError, match=err_msg):
        ct.transform(X_array_fewer)
Example #2
0
def test_column_transformer_sparse_stacking():
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
    col_trans = ColumnTransformer([('trans1', Trans(), [0]),
                                   ('trans2', SparseMatrixTrans(), 1)],
                                  sparse_threshold=0.8)
    col_trans.fit(X_array)
    X_trans = col_trans.transform(X_array)
    assert sparse.issparse(X_trans)
    assert X_trans.shape == (X_trans.shape[0], X_trans.shape[0] + 1)
    assert_array_equal(X_trans.toarray()[:, 1:], np.eye(X_trans.shape[0]))
    assert len(col_trans.transformers_) == 2
    assert col_trans.transformers_[-1][0] != 'remainder'

    col_trans = ColumnTransformer([('trans1', Trans(), [0]),
                                   ('trans2', SparseMatrixTrans(), 1)],
                                  sparse_threshold=0.1)
    col_trans.fit(X_array)
    X_trans = col_trans.transform(X_array)
    assert not sparse.issparse(X_trans)
    assert X_trans.shape == (X_trans.shape[0], X_trans.shape[0] + 1)
    assert_array_equal(X_trans[:, 1:], np.eye(X_trans.shape[0]))
Example #3
0
def test_column_transformer_reordered_column_names_remainder(explicit_colname):
    """Regression test for issue #14223: 'Named col indexing fails with
       ColumnTransformer remainder on changing DataFrame column ordering'

       Should raise error on changed order combined with remainder.
       Should allow for added columns in `transform` input DataFrame
       as long as all preceding columns match.
    """
    pd = pytest.importorskip('pandas')

    X_fit_array = np.array([[0, 1, 2], [2, 4, 6]]).T
    X_fit_df = pd.DataFrame(X_fit_array, columns=['first', 'second'])

    X_trans_array = np.array([[2, 4, 6], [0, 1, 2]]).T
    X_trans_df = pd.DataFrame(X_trans_array, columns=['second', 'first'])

    tf = ColumnTransformer([('bycol', Trans(), explicit_colname)],
                           remainder=Trans())

    tf.fit(X_fit_df)
    err_msg = 'Column ordering must be equal'
    warn_msg = ("Given feature/column names or counts do not match the ones "
                "for the data given during fit.")
    with pytest.raises(ValueError, match=err_msg):
        tf.transform(X_trans_df)

    # No error for added columns if ordering is identical
    X_extended_df = X_fit_df.copy()
    X_extended_df['third'] = [3, 6, 9]
    with pytest.warns(DeprecationWarning, match=warn_msg):
        tf.transform(X_extended_df)  # No error should be raised, for now

    # No 'columns' AttributeError when transform input is a numpy array
    X_array = X_fit_array.copy()
    err_msg = 'Specifying the columns'
    with pytest.raises(ValueError, match=err_msg):
        tf.transform(X_array)
Example #4
0
def test_feature_name_validation():
    """Tests if the proper warning/error is raised if the columns do not match
    during fit and transform."""
    pd = pytest.importorskip("pandas")

    X = np.ones(shape=(3, 2))
    X_extra = np.ones(shape=(3, 3))
    df = pd.DataFrame(X, columns=['a', 'b'])
    df_extra = pd.DataFrame(X_extra, columns=['a', 'b', 'c'])

    tf = ColumnTransformer([('bycol', Trans(), ['a', 'b'])])
    tf.fit(df)

    msg = ("Given feature/column names or counts do not match the ones for "
           "the data given during fit.")
    with pytest.warns(DeprecationWarning, match=msg):
        tf.transform(df_extra)

    tf = ColumnTransformer([('bycol', Trans(), [0])])
    tf.fit(df)

    with pytest.warns(DeprecationWarning, match=msg):
        tf.transform(X_extra)

    with warnings.catch_warnings(record=True) as warns:
        tf.transform(X)
    assert not warns

    tf = ColumnTransformer([('bycol', Trans(), ['a'])], remainder=Trans())
    tf.fit(df)
    with pytest.warns(DeprecationWarning, match=msg):
        tf.transform(df_extra)

    tf = ColumnTransformer([('bycol', Trans(), [0, -1])])
    tf.fit(df)
    msg = "At least one negative column was used to"
    with pytest.raises(RuntimeError, match=msg):
        tf.transform(df_extra)

    tf = ColumnTransformer([('bycol', Trans(), slice(-1, -3, -1))])
    tf.fit(df)
    with pytest.raises(RuntimeError, match=msg):
        tf.transform(df_extra)

    with warnings.catch_warnings(record=True) as warns:
        tf.transform(df)
    assert not warns