Beispiel #1
0
    def test_predict_classifier_wo_sigmoid(self):
        ret_sql = f"""\
-- client: molehill/{molehill.__version__}
with features_exploded as (
  select
    id
    , extract_feature(fv) as feature
    , extract_weight(fv) as value
  from
    target_tbl t1
    LATERAL VIEW explode(features) t2 as fv
)
-- DIGDAG_INSERT_LINE
select
  t1.id
  , sum(m1.weight * t1.value) as total_weight
from
  features_exploded t1
  left outer join model_tbl m1
    on (t1.feature = m1.feature)
group by
  t1.id
;
"""
        pred_sql, pred_col = predict_classifier("target_tbl",
                                                "id",
                                                "model_tbl",
                                                sigmoid=False)
        assert pred_sql == ret_sql
        assert pred_col == "total_weight"
Beispiel #2
0
    def test_predict_classifier_bias_hashing(self):
        ret_sql = f"""\
-- client: molehill/{molehill.__version__}
with features_exploded as (
  select
    id
    , extract_feature(fv) as feature
    , extract_weight(fv) as value
  from
    target_tbl t1
    LATERAL VIEW explode(add_bias(feature_hashing(features))) t2 as fv
)
-- DIGDAG_INSERT_LINE
select
  t1.id
  , sigmoid(sum(m1.weight * t1.value)) as probability
from
  features_exploded t1
  left outer join model_tbl m1
    on (t1.feature = m1.feature)
group by
  t1.id
;
"""
        pred_sql, pred_col = predict_classifier("target_tbl",
                                                "id",
                                                "model_tbl",
                                                bias=True,
                                                hashing=True)
        assert pred_sql == ret_sql
        assert pred_col == "probability"