예제 #1
0
def test_pick_custom_emb_dim(test_data):
    cat_sz = categorize(test_data)
    emb_sz = pick_emb_dim(cat_sz, emb_dims=[3, 4])

    assert len(emb_sz) == 2
    assert emb_sz['Categorical 1'] == (10, 3)
    assert emb_sz['Categorical 2'] == (10, 4)
예제 #2
0
def test_categorize(test_data):

    cat_sz = categorize(test_data)

    assert len(cat_sz) == 2
    assert cat_sz[0] == ('Categorical 1', 10)
    assert cat_sz[1] == ('Categorical 2', 10)
예제 #3
0
def test_pick_emb_dim(test_data):
    cat_sz = categorize(test_data)

    emb_sz = pick_emb_dim(cat_sz)

    assert len(emb_sz) == 2
    assert emb_sz['Categorical 1'] == (10, 5)
    assert emb_sz['Categorical 2'] == (10, 5)
예제 #4
0
def test_transform(test_data):
    X, y = test_data
    cat_sz = categorize(X)
    emb_sz = pick_emb_dim(cat_sz)
    X, encoders = encode_categorical(X)

    embedder = Embedder(emb_sz)
    embedder.fit(X, y, epochs=1)

    transformed = embedder.transform(X)

    assert transformed.shape == (10000, 18 + 50 + 50)
예제 #5
0
def test_fit_predict(test_data):
    X, y = test_data
    cat_sz = categorize(X)
    emb_sz = pick_emb_dim(cat_sz)
    X, encoders = encode_categorical(X)

    embedder = Embedder(emb_sz)
    embedder.fit(X, y, epochs=1)

    preds = embedder.predict(X[:100])

    assert len(preds) == 100
    assert all(preds <= 1) and all(preds >= 0)
예제 #6
0
def test_fit_predict(test_data):
    X, y = test_data
    cat_sz = categorize(X)
    emb_sz = pick_emb_dim(cat_sz)
    X, encoders = encode_categorical(X)

    embedder = Embedder(emb_sz)
    embedder.fit(X, y, epochs=1)

    preds = embedder.predict(X[:100])

    assert len(preds) == 100
    assert not np.isinf(preds).any()
    assert not np.isnan(preds).any()
예제 #7
0
def test_pipeline(test_data):
    X, y = test_data
    cat_sz = categorize(X)
    emb_sz = pick_emb_dim(cat_sz)
    X_encoded, encoders = encode_categorical(X)

    pipeline = Pipeline([('embedding', Embedder(emb_sz)),
                         ('randomforest', RandomForestRegressor())])

    pipeline.fit(X_encoded, y)
    preds = pipeline.predict(X_encoded)

    assert len(preds) == 10000
    assert not np.isinf(preds).any()
    assert not np.isnan(preds).any()