def reduce(self, result_set): from epac.utils import train_test_split # Iterate over the result_set: a list of results (see transform). # Each result contains an additional unique key called "key". Example: # "MySVC(C=1.0)" or "MySVC(C=2.0)" # then you can design you own reducer! max_accuracy = -1 for result in result_set: # Each result is the dictionary returned by "transform". # If there is a CV in the workflow, EPAC suffixes keys # with /test or /train. # function train_test_split split result (dict) into two sub-dicts # removing /test or /train suffix. It returns two reference of the same # dict if no /test or /train suffix where found. output = dict() # output result is a dictonary result_train, result_test = train_test_split(result) if result_train is result_test: # No CV in the EPAC workflow accuracy = accuracy_score(result['y/true'], result['y/pred']) output["acc/y"] = accuracy else: # there was a CV in the EPAC workflow accuracy = accuracy_score(result_test['y/true'], result_test['y/pred']) output["acc/y/test"] = accuracy output["acc/y/train"] = accuracy_score(result_train['y/true'], result_train['y/pred']) if accuracy > max_accuracy: # keep the key in the reduced result best_result = Result(key=result['key'], **output) return best_result # reducer return a single result
def transform(self, **Xy): """ Parameter --------- Xy: dictionary parameters for fit and transform """ if conf.KW_SPLIT_TRAIN_TEST in Xy: Xy_train, Xy_test = train_test_split(Xy) res = self.estimator.fit(**_sub_dict(Xy_train, self.in_args_fit)) # catch args_transform in ds, transform, store output in a dict Xy_out_tr = _as_dict(self.estimator.transform( **_sub_dict(Xy_train, self.in_args_transform)), keys=self.in_args_transform) Xy_out_te = _as_dict(self.estimator.transform(**_sub_dict(Xy_test, self.in_args_transform)), keys=self.in_args_transform) Xy_out = train_test_merge(Xy_out_tr, Xy_out_te) else: res = self.estimator.fit(**_sub_dict(Xy, self.in_args_fit)) # catch args_transform in ds, transform, store output in a dict Xy_out = _as_dict(self.estimator.transform(**_sub_dict(Xy, self.in_args_transform)), keys=self.in_args_transform) # update ds with transformed values Xy.update(Xy_out) return Xy
def transform(self, **Xy): """ Parameter --------- Xy: dictionary parameters for transform """ if conf.KW_SPLIT_TRAIN_TEST in Xy: Xy_train, Xy_test = train_test_split(Xy) # catch args_transform in ds, transform, store output in a dict Xy_out_tr = self.wrapped_node.transform( **_sub_dict(Xy_train, self.in_args_transform)) Xy_out_te = self.wrapped_node.transform( **_sub_dict(Xy_test, self.in_args_transform)) if type(Xy_out_tr) is not dict or type(Xy_out_te) is not dict: raise ValueError("%s.transform should return a dictionary" % (self.wrapped_node.__class__.__name__)) Xy_out = train_test_merge(Xy_out_tr, Xy_out_te) else: # catch args_transform in ds, transform, store output in a dict Xy_out = self.wrapped_node.transform( **_sub_dict(Xy, self.in_args_transform)) if type(Xy_out) is not dict: raise ValueError("%s.transform should return a dictionary" % (self.wrapped_node.__class__.__name__)) return Xy_out
def transform(self, **Xy): """ Parameter --------- Xy: dictionary parameters for transform """ if conf.KW_SPLIT_TRAIN_TEST in Xy: Xy_train, Xy_test = train_test_split(Xy) # catch args_transform in ds, transform, store output in a dict Xy_out_tr = self.wrapped_node.transform(**_sub_dict( Xy_train, self.in_args_transform)) Xy_out_te = self.wrapped_node.transform(**_sub_dict( Xy_test, self.in_args_transform)) if type(Xy_out_tr) is not dict or type(Xy_out_te) is not dict: raise ValueError("%s.transform should return a dictionary" % (self.wrapped_node.__class__.__name__)) Xy_out = train_test_merge(Xy_out_tr, Xy_out_te) else: # catch args_transform in ds, transform, store output in a dict Xy_out = self.wrapped_node.transform(**_sub_dict(Xy, self.in_args_transform)) if type(Xy_out) is not dict: raise ValueError("%s.transform should return a dictionary" % (self.wrapped_node.__class__.__name__)) return Xy_out
def transform(self, **Xy): Xy_train, Xy_test = train_test_split(Xy) result = Result(key=self.get_signature(), **Xy) if not self.store: self.store = StoreMem() self.save_results(ResultSet(result)) if Xy_train is Xy_test: return Xy else: return Xy_train
def transform(self, **Xy): Xy_train, Xy_test = train_test_split(Xy) if Xy_train is Xy_test: to_refit, best_params = self._search_best(**Xy) else: to_refit, best_params = self._search_best(**Xy_train) out = to_refit.top_down(**Xy) out[conf.BEST_PARAMS] = best_params self.refited = to_refit self.best_params = best_params return out
def transform(self, **Xy): """ Parameter --------- Xy: dictionary parameters for fit and transform """ if conf.KW_SPLIT_TRAIN_TEST in Xy: Xy_train, Xy_test = train_test_split(Xy) Xy_out = dict() # Train fit res = self.estimator.fit(**_sub_dict(Xy_train, self.in_args_fit)) # Train predict Xy_out_tr = _as_dict(self.estimator.predict(**_sub_dict(Xy_train, self.in_args_predict)), keys=self.out_args_predict) Xy_out_tr = _dict_suffix_keys(Xy_out_tr, suffix=conf.SEP + conf.TRAIN + conf.SEP + conf.PREDICTION) Xy_out.update(Xy_out_tr) # Test predict Xy_out_te = _as_dict(self.estimator.predict(**_sub_dict(Xy_test, self.in_args_predict)), keys=self.out_args_predict) Xy_out_te = _dict_suffix_keys(Xy_out_te, suffix=conf.SEP + conf.TEST + conf.SEP + conf.PREDICTION) Xy_out.update(Xy_out_te) ## True test Xy_test_true = _sub_dict(Xy_test, self.out_args_predict) Xy_out_true = _dict_suffix_keys(Xy_test_true, suffix=conf.SEP + conf.TEST + conf.SEP + conf.TRUE) Xy_out.update(Xy_out_true) else: res = self.estimator.fit(**_sub_dict(Xy, self.in_args_fit)) # catch args_transform in ds, transform, store output in a dict Xy_out = _as_dict(self.estimator.predict(**_sub_dict(Xy, self.in_args_predict)), keys=self.out_args_predict) Xy_out = _dict_suffix_keys(Xy_out, suffix=conf.SEP + conf.PREDICTION) ## True test Xy_true = _sub_dict(Xy, self.out_args_predict) Xy_out_true = _dict_suffix_keys(Xy_true, suffix=conf.SEP + conf.TRUE) Xy_out.update(Xy_out_true) return Xy_out
def reduce(self, result_set): # if you want to a remote execution of your code, import should be done # within methods from epac.utils import train_test_split from epac.map_reduce.results import ResultSet outputs = list() # output result is a dictonary for result in result_set: output = dict() # output result is a dictonary result_train, result_test = train_test_split(result) if result_train is result_test: accuracy = accuracy_score(result['y/true'], result['y/pred']) output["acc/y"] = accuracy else: accuracy = accuracy_score(result_test['y/true'], result_test['y/pred']) output["acc/y/test"] = accuracy output["acc/y/train"] = accuracy_score(result_train['y/true'], result_train['y/pred']) outputs.append(Result(key=result['key'], **output)) return ResultSet(*outputs)
def transform(self, **Xy): """ Parameter --------- Xy: dictionary parameters for fit and transform """ is_fit_predict = False is_fit_transform = False if (hasattr(self.wrapped_node, "transform") and hasattr(self.wrapped_node, "predict")): if not self.children: # leaf node is_fit_predict = True else: # internal node is_fit_transform = True elif hasattr(self.wrapped_node, "transform"): is_fit_transform = True elif hasattr(self.wrapped_node, "predict"): is_fit_predict = True if is_fit_transform: Xy_train, Xy_test = train_test_split(Xy) if Xy_train is not Xy_test: res = self.wrapped_node.fit(**_sub_dict(Xy_train, self.in_args_fit)) Xy_out_tr = self._wrapped_node_transform(**Xy_train) Xy_out_te = self._wrapped_node_transform(**Xy_test) Xy_out = train_test_merge(Xy_out_tr, Xy_out_te) else: res = self.wrapped_node.fit(**_sub_dict(Xy, self.in_args_fit)) Xy_out = self._wrapped_node_transform(**Xy) # update ds with transformed values Xy.update(Xy_out) return Xy elif is_fit_predict: Xy_train, Xy_test = train_test_split(Xy) if Xy_train is not Xy_test: Xy_out = dict() res = self.wrapped_node.fit(**_sub_dict(Xy_train, self.in_args_fit)) Xy_out_tr = self._wrapped_node_predict(**Xy_train) Xy_out_tr = _dict_suffix_keys( Xy_out_tr, suffix=conf.SEP + conf.TRAIN + conf.SEP + conf.PREDICTION) Xy_out.update(Xy_out_tr) # Test predict Xy_out_te = self._wrapped_node_predict(**Xy_test) Xy_out_te = _dict_suffix_keys( Xy_out_te, suffix=conf.SEP + conf.TEST + conf.SEP + conf.PREDICTION) Xy_out.update(Xy_out_te) ## True test Xy_test_true = _sub_dict(Xy_test, self.out_args_predict) Xy_out_true = _dict_suffix_keys( Xy_test_true, suffix=conf.SEP + conf.TEST + conf.SEP + conf.TRUE) Xy_out.update(Xy_out_true) else: res = self.wrapped_node.fit(**_sub_dict(Xy, self.in_args_fit)) Xy_out = self._wrapped_node_predict(**Xy) Xy_out = _dict_suffix_keys( Xy_out, suffix=conf.SEP + conf.PREDICTION) ## True test Xy_true = _sub_dict(Xy, self.out_args_predict) Xy_out_true = _dict_suffix_keys( Xy_true, suffix=conf.SEP + conf.TRUE) Xy_out.update(Xy_out_true) return Xy_out else: raise ValueError("%s should implement either transform or predict" % self.wrapped_node.__class__.__name__)