def test_feature_hasher_dicts(): h = FeatureHasher(n_features=16) assert_equal("dict", h.input_type) raw_X = [{"dada": 42, "tzara": 37}, {"gaga": 17}] X1 = FeatureHasher(n_features=16).transform(raw_X) gen = (d.iteritems() for d in raw_X) X2 = FeatureHasher(n_features=16, input_type="pair").transform(gen) assert_array_equal(X1.toarray(), X2.toarray())
def test_feature_hasher_dicts(): feature_hasher = FeatureHasher(n_features=16) assert "dict" == feature_hasher.input_type raw_X = [{"foo": "bar", "dada": 42, "tzara": 37}, {"foo": "baz", "gaga": "string1"}] X1 = FeatureHasher(n_features=16).transform(raw_X) gen = (iter(d.items()) for d in raw_X) X2 = FeatureHasher(n_features=16, input_type="pair").transform(gen) assert_array_equal(X1.toarray(), X2.toarray())
def test_feature_hasher_dicts(): h = FeatureHasher(n_features=16) assert_equal("dict", h.input_type) raw_X = [{"dada": 42, "tzara": 37}, {"gaga": 17}] X1 = FeatureHasher(n_features=16).transform(raw_X) gen = (iter(d.items()) for d in raw_X) X2 = FeatureHasher(n_features=16, input_type="pair").transform(gen) assert_array_equal(X1.toarray(), X2.toarray())
def ordinal_encode(df): df = df.copy() oe = FeatureHasher(n_features=2) print(df.state.values.reshape(-1, 1)) oe.transform(df.state.values.reshape(-1, 1)) df[['state1', 'state2']] = oe.toarray() return train_test_split(df.drop('churn', axis=1), df.churn, random_state=1234)
def feature_hashing(features, size_f): h = FeatureHasher(n_features=size_f) f = h.transform(features) print h.toarray()