def test_one_hot_encoder_drop_manual(): cats_to_drop = ['def', 12, 3, 56] enc = OneHotEncoder(drop=cats_to_drop) X = [['abc', 12, 2, 55], ['def', 12, 1, 55], ['def', 12, 3, 56]] trans = enc.fit_transform(X).toarray() exp = [[1, 0, 1, 1], [0, 1, 0, 1], [0, 0, 0, 0]] assert_array_equal(trans, exp) dropped_cats = [cat[feature] for cat, feature in zip(enc.categories_, enc.drop_idx_)] assert_array_equal(dropped_cats, cats_to_drop) assert_array_equal(np.array(X, dtype=object), enc.inverse_transform(trans))
def test_one_hot_encoder_inverse(sparse_, drop): X = [['abc', 2, 55], ['def', 1, 55], ['abc', 3, 55]] enc = OneHotEncoder(sparse=sparse_, drop=drop) X_tr = enc.fit_transform(X) exp = np.array(X, dtype=object) assert_array_equal(enc.inverse_transform(X_tr), exp) X = [[2, 55], [1, 55], [3, 55]] enc = OneHotEncoder(sparse=sparse_, categories='auto', drop=drop) X_tr = enc.fit_transform(X) exp = np.array(X) assert_array_equal(enc.inverse_transform(X_tr), exp) if drop is None: # with unknown categories # drop is incompatible with handle_unknown=ignore X = [['abc', 2, 55], ['def', 1, 55], ['abc', 3, 55]] enc = OneHotEncoder(sparse=sparse_, handle_unknown='ignore', categories=[['abc', 'def'], [1, 2], [54, 55, 56]]) X_tr = enc.fit_transform(X) exp = np.array(X, dtype=object) exp[2, 1] = None assert_array_equal(enc.inverse_transform(X_tr), exp) # with an otherwise numerical output, still object if unknown X = [[2, 55], [1, 55], [3, 55]] enc = OneHotEncoder(sparse=sparse_, categories=[[1, 2], [54, 56]], handle_unknown='ignore') X_tr = enc.fit_transform(X) exp = np.array(X, dtype=object) exp[2, 0] = None exp[:, 1] = None assert_array_equal(enc.inverse_transform(X_tr), exp) # incorrect shape raises X_tr = np.array([[0, 1, 1], [1, 0, 1]]) msg = re.escape('Shape of the passed X data is not correct') with pytest.raises(ValueError, match=msg): enc.inverse_transform(X_tr)