sel = SelectKBest() pipe = make_pipeline(sel, lr) cross_validate(pipe, X, y, cv=GroupKFold(), props={ 'sample_weight': my_weights, 'groups': my_groups }, scoring='accuracy') # %% # Case D: different scoring and fitting weights weighted_acc = make_scorer(accuracy_score) def specially_weighted_acc(est, X, y, props): props = props.copy() props['sample_weight'] = 'scoring_weight' return weighted_acc(est, X, y, props) lr = LogisticRegressionCV( cv=GroupKFold(), scoring=specially_weighted_acc, ) cross_validate(lr, X, y,
from defs import (accuracy, group_cv, make_scorer, SelectKBest, LogisticRegressionCV, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting # Here we presume that GroupKFold requests `groups` by default. # We need to explicitly request weights in make_scorer and for # LogisticRegressionCV. Both of these consumers understand the meaning # of the key "sample_weight". weighted_acc = make_scorer(accuracy, request_props=['sample_weight']) lr = LogisticRegressionCV( cv=group_cv, scoring=weighted_acc, ).request_sample_weight(fit=['sample_weight']) cross_validate(lr, X, y, cv=group_cv, props={ 'sample_weight': my_weights, 'groups': my_groups }, scoring=weighted_acc) # Error handling: if props={'sample_eight': my_weights, ...} was passed, # cross_validate would raise an error, since 'sample_eight' was not requested # by any of its children.
from defs import (accuracy_score, GroupKFold, make_scorer, SelectKBest, LogisticRegressionCV, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting # Here we presume that GroupKFold requests `groups` by default. # We need to explicitly request weights in make_scorer and for # LogisticRegressionCV. Both of these consumers understand the meaning # of the key "sample_weight". weighted_acc = make_scorer(accuracy_score, request_metadata=['sample_weight']) group_cv = GroupKFold() lr = LogisticRegressionCV( cv=group_cv, scoring=weighted_acc, ).request_sample_weight(fit=True) # same as `fit=['sample_weight']` cross_validate(lr, X, y, cv=group_cv, metadata={ 'sample_weight': my_weights, 'groups': my_groups }, scoring=weighted_acc) # Here lr.get_metadata_request() would return # {'fit': {'groups': {'groups'}, 'sample_weight': {'sample_weight'}}, # 'predict': {},