from defs import (SelectKBest, LogisticRegressionCV, GroupKFold, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting lr = LogisticRegressionCV( cv=GroupKFold(), scoring='accuracy', prop_routing={ 'cv': ['groups'], 'scoring': ['sample_weight'], } # one question here is whether we need to explicitly route sample_weight # to LogisticRegressionCV's fitting... ) # Alternative syntax, which assumes cv receives 'groups' by default, and that a # method-based API is provided on meta-estimators: # lr = LogisticRegressionCV( # cv=GroupKFold(), # scoring='accuracy', # ).add_prop_route(scoring='sample_weight') cross_validate( lr, X, y, cv=GroupKFold(), props={
def wrapped_weighted_acc(est, X, y, sample_weight=None): return acc_scorer(est, X, y, sample_weight=MY_WEIGHTS.loc[X.index]) lr = WeightedLogisticRegressionCV( cv=wrapped_group_cv, scoring=wrapped_weighted_acc, ).set_props_request(['sample_weight']) cross_validate(lr, X, y, cv=wrapped_group_cv, scoring=wrapped_weighted_acc) # %% # Case B: weighted scoring and unweighted fitting lr = LogisticRegressionCV( cv=wrapped_group_cv, scoring=wrapped_weighted_acc, ).set_props_request(['sample_weight']) cross_validate(lr, X, y, cv=wrapped_group_cv, scoring=wrapped_weighted_acc) # %% # Case C: unweighted feature selection lr = WeightedLogisticRegressionCV( cv=wrapped_group_cv, scoring=wrapped_weighted_acc, ).set_props_request(['sample_weight']) sel = SelectKBest() pipe = make_pipeline(sel, lr) cross_validate(pipe, X, y, cv=wrapped_group_cv, scoring=wrapped_weighted_acc) # %%
from defs import (accuracy_score, GroupKFold, make_scorer, SelectKBest, LogisticRegressionCV, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting # Here we presume that GroupKFold requests `groups` by default. # We need to explicitly request weights in make_scorer and for # LogisticRegressionCV. Both of these consumers understand the meaning # of the key "sample_weight". weighted_acc = make_scorer(accuracy_score, request_props=['sample_weight']) lr = LogisticRegressionCV( cv=GroupKFold(), scoring=weighted_acc, ).set_props_request(['sample_weight']) cross_validate(lr, X, y, cv=GroupKFold(), props={ 'sample_weight': my_weights, 'groups': my_groups }, scoring=weighted_acc) # Error handling: if props={'sample_eight': my_weights, ...} was passed, # cross_validate would raise an error, since 'sample_eight' was not requested # by any of its children.
from defs import (accuracy, group_cv, make_scorer, SelectKBest, LogisticRegressionCV, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting # Here we presume that GroupKFold requests `groups` by default. # We need to explicitly request weights in make_scorer and for # LogisticRegressionCV. Both of these consumers understand the meaning # of the key "sample_weight". weighted_acc = make_scorer(accuracy, request_props=['sample_weight']) lr = LogisticRegressionCV( cv=group_cv, scoring=weighted_acc, ).request_sample_weight(fit=['sample_weight']) cross_validate(lr, X, y, cv=group_cv, props={ 'sample_weight': my_weights, 'groups': my_groups }, scoring=weighted_acc) # Error handling: if props={'sample_eight': my_weights, ...} was passed, # cross_validate would raise an error, since 'sample_eight' was not requested # by any of its children.
from defs import (accuracy_score, GroupKFold, make_scorer, SelectKBest, LogisticRegressionCV, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting lr = LogisticRegressionCV( cv=GroupKFold(), scoring='accuracy', ) cross_validate(lr, X, y, cv=GroupKFold(), props={ 'sample_weight': my_weights, 'groups': my_groups }, scoring='accuracy') # Error handling: if props={'sample_eight': my_weights, ...} was passed # instead, the estimator would fit and score without weight, silently failing. # %% # Case B: weighted scoring and unweighted fitting class MyLogisticRegressionCV(LogisticRegressionCV): def fit(self, X, y, props=None): props = props.copy()
from defs import (GroupKFold, SelectKBest, LogisticRegressionCV, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting lr = LogisticRegressionCV( cv=GroupKFold(), scoring='accuracy', ) props = {'cv__groups': my_groups, 'estimator__cv__groups': my_groups, 'estimator__sample_weight': my_weights, 'scoring__sample_weight': my_weights, 'estimator__scoring__sample_weight': my_weights} cross_validate(lr, X, y, cv=GroupKFold(), props=props, scoring='accuracy') # error handling: if props={'estimator__sample_eight': my_weights, ...} was # passed instead, the estimator would raise an error. # %% # Case B: weighted scoring and unweighted fitting lr = LogisticRegressionCV( cv=GroupKFold(), scoring='accuracy', ) props = {'cv__groups': my_groups,
from defs import (accuracy, group_cv, make_scorer, SelectKBest, LogisticRegressionCV, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting lr = LogisticRegressionCV( cv=group_cv, scoring='accuracy', ) cross_validate(lr, X, y, cv=group_cv, props={ 'sample_weight': my_weights, 'groups': my_groups }, scoring='accuracy') # Error handling: if props={'sample_eight': my_weights, ...} was passed # instead, the estimator would fit and score without weight, silently failing. # %% # Case B: weighted scoring and unweighted fitting class MyLogisticRegressionCV(LogisticRegressionCV): def fit(self, X, y, props=None): props = props.copy()
from defs import (accuracy_score, GroupKFold, make_scorer, SelectKBest, LogisticRegressionCV, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting # Here we presume that GroupKFold requests `groups` by default. # We need to explicitly request weights in make_scorer and for # LogisticRegressionCV. Both of these consumers understand the meaning # of the key "sample_weight". weighted_acc = make_scorer(accuracy_score, request_metadata=['sample_weight']) group_cv = GroupKFold() lr = LogisticRegressionCV( cv=group_cv, scoring=weighted_acc, ).request_sample_weight(fit=True) # same as `fit=['sample_weight']` cross_validate(lr, X, y, cv=group_cv, metadata={ 'sample_weight': my_weights, 'groups': my_groups }, scoring=weighted_acc) # Here lr.get_metadata_request() would return # {'fit': {'groups': {'groups'}, 'sample_weight': {'sample_weight'}}, # 'predict': {}, # 'transform': {},
from defs import (group_cv, SelectKBest, LogisticRegressionCV, cross_validate, make_pipeline, X, y, my_groups, my_weights, my_other_weights) # %% # Case A: weighted scoring and fitting lr = LogisticRegressionCV( cv=group_cv, scoring='accuracy', ) props = { 'cv__groups': my_groups, 'estimator__cv__groups': my_groups, 'estimator__sample_weight': my_weights, 'scoring__sample_weight': my_weights, 'estimator__scoring__sample_weight': my_weights } cross_validate(lr, X, y, cv=group_cv, props=props, scoring='accuracy') # error handling: if props={'estimator__sample_eight': my_weights, ...} was # passed instead, the estimator would raise an error. # %% # Case B: weighted scoring and unweighted fitting lr = LogisticRegressionCV( cv=group_cv, scoring='accuracy', ) props = { 'cv__groups': my_groups,