def test_pick_custom_emb_dim(test_data): cat_sz = categorize(test_data) emb_sz = pick_emb_dim(cat_sz, emb_dims=[3, 4]) assert len(emb_sz) == 2 assert emb_sz['Categorical 1'] == (10, 3) assert emb_sz['Categorical 2'] == (10, 4)
def test_categorize(test_data): cat_sz = categorize(test_data) assert len(cat_sz) == 2 assert cat_sz[0] == ('Categorical 1', 10) assert cat_sz[1] == ('Categorical 2', 10)
def test_pick_emb_dim(test_data): cat_sz = categorize(test_data) emb_sz = pick_emb_dim(cat_sz) assert len(emb_sz) == 2 assert emb_sz['Categorical 1'] == (10, 5) assert emb_sz['Categorical 2'] == (10, 5)
def test_transform(test_data): X, y = test_data cat_sz = categorize(X) emb_sz = pick_emb_dim(cat_sz) X, encoders = encode_categorical(X) embedder = Embedder(emb_sz) embedder.fit(X, y, epochs=1) transformed = embedder.transform(X) assert transformed.shape == (10000, 18 + 50 + 50)
def test_fit_predict(test_data): X, y = test_data cat_sz = categorize(X) emb_sz = pick_emb_dim(cat_sz) X, encoders = encode_categorical(X) embedder = Embedder(emb_sz) embedder.fit(X, y, epochs=1) preds = embedder.predict(X[:100]) assert len(preds) == 100 assert all(preds <= 1) and all(preds >= 0)
def test_fit_predict(test_data): X, y = test_data cat_sz = categorize(X) emb_sz = pick_emb_dim(cat_sz) X, encoders = encode_categorical(X) embedder = Embedder(emb_sz) embedder.fit(X, y, epochs=1) preds = embedder.predict(X[:100]) assert len(preds) == 100 assert not np.isinf(preds).any() assert not np.isnan(preds).any()
def test_pipeline(test_data): X, y = test_data cat_sz = categorize(X) emb_sz = pick_emb_dim(cat_sz) X_encoded, encoders = encode_categorical(X) pipeline = Pipeline([('embedding', Embedder(emb_sz)), ('randomforest', RandomForestRegressor())]) pipeline.fit(X_encoded, y) preds = pipeline.predict(X_encoded) assert len(preds) == 10000 assert not np.isinf(preds).any() assert not np.isnan(preds).any()