def __init__(self, node, **kwargs): super(CVBestSearchRefitParallel, self).__init__(wrapped_node=None) #### 'y/test/score_recall_mean' default_score = "y" + conf.SEP + \ conf.TEST + conf.SEP + \ conf.SCORE_RECALL_MEAN score = kwargs.pop("score") if "score" in kwargs else default_score arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True from epac.workflow.splitters import CV # methods = Methods(*tasks) cv_node = CV(node=node, reducer=ClassificationReport(keep=False), **kwargs) self.add_child(cv_node) self.score = score self.arg_max = arg_max self.refited = None self.best_params = None self.reducer = CVBestSearchRefitPReducer(self)
def __init__(self, node, **kwargs): super(CVBestSearchRefitParallel, self).__init__(wrapped_node=None) #### 'y/test/score_recall_mean' default_score = "y" + conf.SEP + \ conf.TEST + conf.SEP + \ conf.SCORE_RECALL_MEAN score = kwargs.pop("score") if "score" in kwargs else default_score arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True from epac.workflow.splitters import CV # methods = Methods(*tasks) cv_node = CV(node=node, reducer=ClassificationReport(keep=False), **kwargs) self.add_child(cv_node) self.score = score self.arg_max = arg_max self.refited = None self.best_params = None self.reducer = CVBestSearchRefitPReducer(self)
class CVBestSearchRefitParallel(Wrapper): """Cross-validation + grid-search then refit with optimals parameters. Average results over first axis, then find the arguments that maximize or minimise a "score" over other axis. Parameters ---------- See CV parameters, plus other parameters: score: string the score name to be optimized (default "mean_score_te"). arg_max: boolean True/False take parameters that maximize/minimize the score. Default is True. Example ------- >>> from sklearn import datasets >>> from sklearn.svm import SVC >>> from epac import Methods >>> from epac.workflow.splitters import CVBestSearchRefitParallel >>> X, y = datasets.make_classification(n_samples=12, ... n_features=10, ... n_informative=2, ... random_state=1) >>> n_folds_nested = 2 >>> C_values = [.1, 0.5, 1, 2, 5] >>> kernels = ["linear", "rbf"] >>> methods = Methods(*[SVC(C=C, kernel=kernel) ... for C in C_values for kernel in kernels]) >>> wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested) >>> wf.run(X=X, y=y) [[{'y/test/pred': array([0, 0, 1, 0, 0, 0]), 'y/train/pred': array([0, 0, 0, 0, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}], [{'y/test/pred': array([0, 1, 1, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 0, 1, 0]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 0, 1, 0]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}]] >>> wf.reduce() ResultSet( [{'key': CVBestSearchRefitParallel, 'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': [1 0 0 1 0 0 1 0 1 1 0 1], 'y/pred': [1 0 0 1 0 0 1 0 1 1 0 1]}]) """ def __init__(self, node, **kwargs): super(CVBestSearchRefitParallel, self).__init__(wrapped_node=None) #### 'y/test/score_recall_mean' default_score = "y" + conf.SEP + \ conf.TEST + conf.SEP + \ conf.SCORE_RECALL_MEAN score = kwargs.pop("score") if "score" in kwargs else default_score arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True from epac.workflow.splitters import CV # methods = Methods(*tasks) cv_node = CV(node=node, reducer=ClassificationReport(keep=False), **kwargs) self.add_child(cv_node) self.score = score self.arg_max = arg_max self.refited = None self.best_params = None self.reducer = CVBestSearchRefitPReducer(self) def get_signature(self): return self.__class__.__name__ def transform(self, **Xy): Xy_train, Xy_test = train_test_split(Xy) result = Result(key=self.get_signature(), **Xy) if not self.store: self.store = StoreMem() self.save_results(ResultSet(result)) if Xy_train is Xy_test: return Xy else: return Xy_train def _results2dict(self, **cpXy): res_dict = {} for key in cpXy[self.get_signature()]: if not key == "key": res_dict[key] = cpXy[self.get_signature()][key] return res_dict def reduce(self, store_results=True): children_results = [ child.reduce(store_results=False) for child in self.children ] results = ResultSet(*children_results) if self.reducer: to_refit, best_params = self.reducer.reduce(results) Xy = self.load_results() Xy = self._results2dict(**Xy) self.refited = to_refit self.best_params = best_params out = self.refited.top_down(**Xy) out[conf.BEST_PARAMS] = best_params result = Result(key=self.get_signature(), **out) return ResultSet(result) return results
class CVBestSearchRefitParallel(Wrapper): """Cross-validation + grid-search then refit with optimals parameters. Average results over first axis, then find the arguments that maximize or minimise a "score" over other axis. Parameters ---------- See CV parameters, plus other parameters: score: string the score name to be optimized (default "mean_score_te"). arg_max: boolean True/False take parameters that maximize/minimize the score. Default is True. Example ------- >>> from sklearn import datasets >>> from sklearn.svm import SVC >>> from epac import Methods >>> from epac.workflow.splitters import CVBestSearchRefitParallel >>> X, y = datasets.make_classification(n_samples=12, ... n_features=10, ... n_informative=2, ... random_state=1) >>> n_folds_nested = 2 >>> C_values = [.1, 0.5, 1, 2, 5] >>> kernels = ["linear", "rbf"] >>> methods = Methods(*[SVC(C=C, kernel=kernel) ... for C in C_values for kernel in kernels]) >>> wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested) >>> wf.run(X=X, y=y) [[{'y/test/pred': array([0, 0, 1, 0, 0, 0]), 'y/train/pred': array([0, 0, 0, 0, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}, {'y/test/pred': array([1, 1, 1, 0, 1, 1]), 'y/train/pred': array([0, 0, 1, 1, 0, 1]), 'y/test/true': array([1, 0, 0, 1, 0, 1])}], [{'y/test/pred': array([0, 1, 1, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 1, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 0, 1, 0]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 0, 0, 1, 1]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}, {'y/test/pred': array([0, 1, 1, 0, 1, 0]), 'y/train/pred': array([1, 0, 0, 1, 0, 1]), 'y/test/true': array([0, 0, 1, 1, 0, 1])}]] >>> wf.reduce() ResultSet( [{'key': CVBestSearchRefitParallel, 'best_params': [{'kernel': 'rbf', 'C': 0.1, 'name': 'SVC'}], 'y/true': [1 0 0 1 0 0 1 0 1 1 0 1], 'y/pred': [1 0 0 1 0 0 1 0 1 1 0 1]}]) """ def __init__(self, node, **kwargs): super(CVBestSearchRefitParallel, self).__init__(wrapped_node=None) #### 'y/test/score_recall_mean' default_score = "y" + conf.SEP + \ conf.TEST + conf.SEP + \ conf.SCORE_RECALL_MEAN score = kwargs.pop("score") if "score" in kwargs else default_score arg_max = kwargs.pop("arg_max") if "arg_max" in kwargs else True from epac.workflow.splitters import CV # methods = Methods(*tasks) cv_node = CV(node=node, reducer=ClassificationReport(keep=False), **kwargs) self.add_child(cv_node) self.score = score self.arg_max = arg_max self.refited = None self.best_params = None self.reducer = CVBestSearchRefitPReducer(self) def get_signature(self): return self.__class__.__name__ def transform(self, **Xy): Xy_train, Xy_test = train_test_split(Xy) result = Result(key=self.get_signature(), **Xy) if not self.store: self.store = StoreMem() self.save_results(ResultSet(result)) if Xy_train is Xy_test: return Xy else: return Xy_train def _results2dict(self, **cpXy): res_dict = {} for key in cpXy[self.get_signature()]: if not key == "key": res_dict[key] = cpXy[self.get_signature()][key] return res_dict def reduce(self, store_results=True): children_results = [child.reduce(store_results=False) for child in self.children] results = ResultSet(*children_results) if self.reducer: to_refit, best_params = self.reducer.reduce(results) Xy = self.load_results() Xy = self._results2dict(**Xy) self.refited = to_refit self.best_params = best_params out = self.refited.top_down(**Xy) out[conf.BEST_PARAMS] = best_params result = Result(key=self.get_signature(), **out) return ResultSet(result) return results