def reduce(self, store_results=True): # Terminaison (leaf) node return results if not self.children: return self.load_state(name="results") # 1) Build sub-aggregates over children children_results = [child.reduce(store_results=False) for child in self.children] result_set = ResultSet(*children_results) if not self.reducer: return result_set # Group by key, without consideration of the fold/permutation number # which is the head of the key # use OrderedDict to preserve runing order from collections import OrderedDict groups = OrderedDict() for result in result_set: # remove the head of the key _, key_tail = key_pop(result["key"], index=0) result["key"] = key_tail key_tail = result["key"] if not key_tail in groups: groups[key_tail] = list() groups[key_tail].append(result) # For each key, stack results reduced = ResultSet() for key in groups: result_stacked = Result.stack(*groups[key]) reduced.add(self.reducer.reduce(result_stacked)) return reduced
def reduce(self, store_results=True): # Terminaison (leaf) node return results if not self.children: return self.load_state(name="results") # 1) Build sub-aggregates over children children_results = [ child.reduce(store_results=False) for child in self.children ] result_set = ResultSet(*children_results) if not self.reducer: return result_set # Group by key, without consideration of the fold/permutation number # which is the head of the key # use OrderedDict to preserve runing order from collections import OrderedDict groups = OrderedDict() for result in result_set: # remove the head of the key _, key_tail = key_pop(result["key"], index=0) result["key"] = key_tail key_tail = result["key"] if not key_tail in groups: groups[key_tail] = list() groups[key_tail].append(result) # For each key, stack results reduced = ResultSet() for key in groups: result_stacked = Result.stack(*groups[key]) reduced.add(self.reducer.reduce(result_stacked)) return reduced
def reduce(self, store_results=True): children_results = [ child.reduce(store_results=False) for child in self.children ] results = ResultSet(*children_results) if self.reducer: to_refit, best_params = self.reducer.reduce(results) Xy = self.load_results() Xy = self._results2dict(**Xy) self.refited = to_refit self.best_params = best_params out = self.refited.top_down(**Xy) out[conf.BEST_PARAMS] = best_params result = Result(key=self.get_signature(), **out) return ResultSet(result) return results
def reduce(self, store_results=True): # 1) Build sub-aggregates over children children_results = [ child.reduce(store_results=False) for child in self.children ] results = ResultSet(*children_results) return results
def reduce(self, store_results=True): # 1) Build sub-aggregates over children children_result_set = [child.reduce(store_results=False) for child in self.children] result_set = ResultSet(*children_result_set) # Append node signature in the keys for result in result_set: result["key"] = key_push(self.get_signature(), result["key"]) return result_set
def transform(self, **Xy): Xy_train, Xy_test = train_test_split(Xy) result = Result(key=self.get_signature(), **Xy) if not self.store: self.store = StoreMem() self.save_results(ResultSet(result)) if Xy_train is Xy_test: return Xy else: return Xy_train
def reduce(self, result_set): # if you want to a remote execution of your code, import should be done # within methods from epac.utils import train_test_split from epac.map_reduce.results import ResultSet outputs = list() # output result is a dictonary for result in result_set: output = dict() # output result is a dictonary result_train, result_test = train_test_split(result) if result_train is result_test: accuracy = accuracy_score(result['y/true'], result['y/pred']) output["acc/y"] = accuracy else: accuracy = accuracy_score(result_test['y/true'], result_test['y/pred']) output["acc/y/test"] = accuracy output["acc/y/train"] = accuracy_score(result_train['y/true'], result_train['y/pred']) outputs.append(Result(key=result['key'], **output)) return ResultSet(*outputs)
def top_down(self, **Xy): """Top-down data processing method This method does nothing more that recursively call parent/children func_name. Most of time, it should be re-defined. Parameters ---------- func_name: str the name of the function to be called recursion: boolean if True recursively call parent/children func_name. If the current node is the root of the tree call the children. This way the whole tree is executed. If it is a leaf, then recursively call the parent before being executed. This a pipeline made of the path from the leaf to the root is executed. **Xy: dict the keyword dictionnary of data-flow Return ------ A dictionnary of processed data """ if conf.TRACE_TOPDOWN: print self.get_key() if debug.DEBUG: debug.current = self debug.Xy = Xy if not self.parent: self.initialization(**Xy) ## Performe some initialization Xy = self.transform(**Xy) if self.children: # Call children func_name down to leaves ret = [child.top_down(**Xy) for child in self.get_children_top_down()] Xy = ret[0] if len(ret) == 1 else ret else: result = Result(key=self.get_signature(), **Xy) self.save_state(ResultSet(result), name="result_set") return Xy
def top_down(self, **Xy): """Top-down data processing method This method does nothing more that recursively call parent/children func_name. Most of time, it should be re-defined. Parameters ---------- func_name: str the name of the function to be called recursion: boolean if True recursively call parent/children func_name. If the current node is the root of the tree call the children. This way the whole tree is executed. If it is a leaf, then recursively call the parent before being executed. This a pipeline made of the path from the leaf to the root is executed. **Xy: dict the keyword dictionnary of data-flow Return ------ A dictionnary of processed data Example ------- >>> from epac import Methods >>> from sklearn.svm import SVC >>> from sklearn import datasets >>> X, y = datasets.make_classification(n_samples=12, ... n_features=10, ... n_informative=2, ... random_state=1) >>> methods = Methods(*[SVC(C=1), SVC(C=2)]) >>> methods.top_down(X=X, y=y) [{'y/true': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'y/pred': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1])}, {'y/true': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1]), 'y/pred': array([1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1])}] """ if conf.TRACE_TOPDOWN: print(self.get_key()) if debug.DEBUG: debug.current = self debug.Xy = Xy if not self.parent: self.initialization(**Xy) # Performe some initialization Xy = self.transform(**Xy) if not self.stop_top_down: if self.children: # Call children func_name down to leaves ret = [ child.top_down(**Xy) for child in self.get_children_top_down() ] Xy = ret[0] if len(ret) == 1 else ret else: result = Result(key=self.get_signature(), **Xy) self.save_results(ResultSet(result)) return Xy
def reduce(self, store_results=True): results = ResultSet(self.children[0].reduce(store_results=False)) for result in results: result["key"] = key_push(self.get_signature(), result["key"]) return results