Example #1
0
def test_vectorize_with_bias(cat_cols, num_cols):
    ret_sql = f"""\
-- client: molehill/{molehill.__version__}
select
  rowid
  , add_bias(
    array_concat(
      quantitative_features(
        array("num1", "num2")
        , num1
        , num2
      ),
      categorical_features(
        array("cat1", "cat2", "cat3")
        , cat1
        , cat2
        , cat3
      )
    )
  ) as features
  , target
from
  src_tbl
;
"""

    assert vectorize('src_tbl', 'target', cat_cols, num_cols,
                     bias=True) == ret_sql
Example #2
0
def test_vectorize_with_emit_null_force_value(cat_cols, num_cols):
    ret_sql = f"""\
-- client: molehill/{molehill.__version__}
select
  rowid
  , array_concat(
    quantitative_features(
      array("num1", "num2")
      , num1
      , num2
      , '-emit_null'
    ),
    categorical_features(
      array("cat1", "cat2", "cat3")
      , cat1
      , cat2
      , cat3
      , '-emit_null -force_value'
    )
  ) as features
  , target
from
  src_tbl
;
"""

    assert vectorize('src_tbl',
                     'target',
                     cat_cols,
                     num_cols,
                     emit_null=True,
                     force_value=True) == ret_sql
Example #3
0
def test_vectorize_with_hashing_cardinality(cat_cols, num_cols):
    ret_sql = f"""\
-- client: molehill/{molehill.__version__}
select
  rowid
  , feature_hashing(
    array_concat(
      quantitative_features(
        array("num1", "num2")
        , num1
        , num2
      ),
      categorical_features(
        array("cat1", "cat2", "cat3")
        , cat1
        , cat2
        , cat3
      )
    )
    , '-num_features 100'
  ) as features
  , target
from
  src_tbl
;
"""

    assert vectorize('src_tbl',
                     'target',
                     cat_cols,
                     num_cols,
                     hashing=True,
                     feature_cardinality=100) == ret_sql
Example #4
0
def test_vectorize_dense(cat_cols, num_cols):
    ret_sql = f"""\
-- client: molehill/{molehill.__version__}
select
  rowid
  , array(num1, num2, cat1, cat2, cat3) as features
  , target
from
  src_tbl
;
"""

    assert vectorize('src_tbl', 'target', cat_cols, num_cols,
                     dense=True) == ret_sql
Example #5
0
def test_vectorize_with_cat_cols(cat_cols):
    ret_sql = f"""\
-- client: molehill/{molehill.__version__}
select
  rowid
  , categorical_features(
    array("cat1", "cat2", "cat3")
    , cat1
    , cat2
    , cat3
  ) as features
  , target
from
  src_tbl
;
"""
    assert vectorize('src_tbl', 'target',
                     categorical_columns=cat_cols) == ret_sql
Example #6
0
def test_vectorize_with_num_cols(num_cols):
    ret_sql = f"""\
-- client: molehill/{molehill.__version__}
select
  rowid
  , quantitative_features(
    array("num1", "num2")
    , num1
    , num2
  ) as features
  , target
from
  src_tbl
;
"""

    assert vectorize('src_tbl', 'target',
                     numerical_columns=num_cols) == ret_sql
Example #7
0
def test_vectorize_dense_with_hashing_cardinality(cat_cols, num_cols):
    ret_sql = f"""\
-- client: molehill/{molehill.__version__}
select
  rowid
  , array(num1, num2, mhash(cat1, 100), mhash(cat2, 100), mhash(cat3, 100)) as features
  , target
from
  src_tbl
;
"""

    assert vectorize('src_tbl',
                     'target',
                     cat_cols,
                     num_cols,
                     dense=True,
                     hashing=True,
                     feature_cardinality=100) == ret_sql
Example #8
0
def test_vectorize_without_cols():
    with pytest.raises(ValueError):
        vectorize('src_tbl', 'target')