Ejemplo n.º 1
0
sel = SelectKBest()
pipe = make_pipeline(sel, lr)
cross_validate(pipe,
               X,
               y,
               cv=GroupKFold(),
               props={
                   'sample_weight': my_weights,
                   'groups': my_groups
               },
               scoring='accuracy')

# %%
# Case D: different scoring and fitting weights

weighted_acc = make_scorer(accuracy_score)


def specially_weighted_acc(est, X, y, props):
    props = props.copy()
    props['sample_weight'] = 'scoring_weight'
    return weighted_acc(est, X, y, props)


lr = LogisticRegressionCV(
    cv=GroupKFold(),
    scoring=specially_weighted_acc,
)
cross_validate(lr,
               X,
               y,
Ejemplo n.º 2
0
from defs import (accuracy, group_cv, make_scorer, SelectKBest,
                  LogisticRegressionCV, cross_validate, make_pipeline, X, y,
                  my_groups, my_weights, my_other_weights)

# %%
# Case A: weighted scoring and fitting

# Here we presume that GroupKFold requests `groups` by default.
# We need to explicitly request weights in make_scorer and for
# LogisticRegressionCV. Both of these consumers understand the meaning
# of the key "sample_weight".

weighted_acc = make_scorer(accuracy, request_props=['sample_weight'])
lr = LogisticRegressionCV(
    cv=group_cv,
    scoring=weighted_acc,
).request_sample_weight(fit=['sample_weight'])
cross_validate(lr,
               X,
               y,
               cv=group_cv,
               props={
                   'sample_weight': my_weights,
                   'groups': my_groups
               },
               scoring=weighted_acc)

# Error handling: if props={'sample_eight': my_weights, ...} was passed,
# cross_validate would raise an error, since 'sample_eight' was not requested
# by any of its children.
from defs import (accuracy_score, GroupKFold, make_scorer, SelectKBest,
                  LogisticRegressionCV, cross_validate, make_pipeline, X, y,
                  my_groups, my_weights, my_other_weights)

# %%
# Case A: weighted scoring and fitting

# Here we presume that GroupKFold requests `groups` by default.
# We need to explicitly request weights in make_scorer and for
# LogisticRegressionCV. Both of these consumers understand the meaning
# of the key "sample_weight".

weighted_acc = make_scorer(accuracy_score, request_metadata=['sample_weight'])
group_cv = GroupKFold()
lr = LogisticRegressionCV(
    cv=group_cv,
    scoring=weighted_acc,
).request_sample_weight(fit=True)  # same as `fit=['sample_weight']`
cross_validate(lr,
               X,
               y,
               cv=group_cv,
               metadata={
                   'sample_weight': my_weights,
                   'groups': my_groups
               },
               scoring=weighted_acc)

# Here lr.get_metadata_request() would return
# {'fit': {'groups': {'groups'}, 'sample_weight': {'sample_weight'}},
#  'predict': {},