def test_mysvc_reducer(self): ## 1) Build dataset ## =================================================================== X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2, random_state=1) ## 2) run with Methods ## =================================================================== my_svc1 = MySVC(C=1.0) my_svc2 = MySVC(C=2.0) two_svc_single = Methods(my_svc1, my_svc2) two_svc_local = Methods(my_svc1, my_svc2) two_svc_swf = Methods(my_svc1, my_svc2) two_svc_single.reducer = MyReducer() two_svc_local.reducer = MyReducer() two_svc_swf.reducer = MyReducer() for leaf in two_svc_single.walk_leaves(): print leaf.get_key() for leaf in two_svc_local.walk_leaves(): print leaf.get_key() for leaf in two_svc_swf.walk_leaves(): print leaf.get_key() # top-down process to call transform two_svc_single.run(X=X, y=y) # buttom-up process to compute scores res_single = two_svc_single.reduce() ### You can get below results: ### ================================================================== ### [{'MySVC(C=1.0)': array([ 1., 1.])}, {'MySVC(C=2.0)': array([ 1., 1.])}] ### 3) Run using local multi-processes ### ================================================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(two_svc_local, num_processes=2) two_svc_local = local_engine.run(**dict(X=X, y=y)) res_local = two_svc_local.reduce() ### 4) Run using soma-workflow ### ================================================================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2) two_svc_swf = sfw_engine.run(**dict(X=X, y=y)) res_swf = two_svc_swf.reduce() if not repr(res_swf) == repr(res_local): raise ValueError("Cannot dump class definition") if not repr(res_swf) == repr(res_single): raise ValueError("Cannot dump class definition")
def test_mysvc_reducer(self): ## 1) Build dataset ## =================================================================== X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2, random_state=1) ## 2) run with Methods ## =================================================================== my_svc1 = MySVC(C=1.0) my_svc2 = MySVC(C=2.0) two_svc_single = Methods(my_svc1, my_svc2) two_svc_local = Methods(my_svc1, my_svc2) two_svc_swf = Methods(my_svc1, my_svc2) two_svc_single.reducer = MyReducer() two_svc_local.reducer = MyReducer() two_svc_swf.reducer = MyReducer() for leaf in two_svc_single.walk_leaves(): print(leaf.get_key()) for leaf in two_svc_local.walk_leaves(): print(leaf.get_key()) for leaf in two_svc_swf.walk_leaves(): print(leaf.get_key()) # top-down process to call transform two_svc_single.run(X=X, y=y) # buttom-up process to compute scores res_single = two_svc_single.reduce() ### You can get below results: ### ================================================================== ### [{'MySVC(C=1.0)': array([ 1., 1.])}, {'MySVC(C=2.0)': array([ 1., 1.])}] ### 3) Run using local multi-processes ### ================================================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(two_svc_local, num_processes=2) two_svc_local = local_engine.run(**dict(X=X, y=y)) res_local = two_svc_local.reduce() ### 4) Run using soma-workflow ### ================================================================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2) two_svc_swf = sfw_engine.run(**dict(X=X, y=y)) res_swf = two_svc_swf.reduce() if not repr(res_swf) == repr(res_local): raise ValueError("Cannot dump class definition") if not repr(res_swf) == repr(res_single): raise ValueError("Cannot dump class definition")
def test_prev_state_methods(self): ## 1) Build dataset ## ================================================ X, y = datasets.make_classification(n_samples=5, n_features=20, n_informative=2) Xy = {"X": X, "y": y} methods = Methods(*[TOY_CLF(v_lambda=v_lambda) for v_lambda in [2, 1]]) methods.run(**Xy) ps_methods = WarmStartMethods(*[TOY_CLF(v_lambda=v_lambda) for v_lambda in [2, 1]]) ps_methods.run(**Xy) self.assertTrue(compare_two_node(methods, ps_methods)) self.assertTrue(comp_2wf_reduce_res(methods, ps_methods))
def test_twomethods(self): key_y_pred = 'y' + conf.SEP + conf.PREDICTION X, y = datasets.make_classification(n_samples=20, n_features=5, n_informative=2) # = With EPAC wf = Methods(LDA(), SVC(kernel="linear")) r_epac = wf.run(X=X, y=y) # = With SKLEARN lda = LDA() svm = SVC(kernel="linear") lda.fit(X, y) svm.fit(X, y) r_sklearn = [lda.predict(X), svm.predict(X)] # Comparison for i_cls in range(2): comp = np.all(np.asarray(r_epac[i_cls][key_y_pred]) == np.asarray(r_sklearn[i_cls])) self.assertTrue(comp, u'Diff Methods') # test reduce r_epac_reduce = [wf.reduce().values()[0][key_y_pred], wf.reduce().values()[1][key_y_pred]] comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn)) self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
if __name__ == "__main__": ## 1) Build dataset ## ================================================ X, y = datasets.make_classification(n_samples=10, n_features=5, n_informative=2, random_state=1) Xy = {"X": X, "y": y} ## 2) Build Methods ## ================================================ print("Methods ===================================") methods = Methods(*[TOY_CLF(v_lambda=v_lambda) for v_lambda in [2, 1]]) print(methods.run(**Xy)) ## 3) Build WarmStartMethods like Methods ## ================================================ ## WarmStartMethods ## / \ ## TOY_CLF(v_lambda=2) TOY_CLF(v_lambda=1) ## ## 1. WarmStartMethods will look for different argumenets as signature ## For example, here is v_lambda, there are different for each leaf ## 2. And then run TOY_CLF(v_lambda=2).transform ## 3. Except v_lambda, WarmStartMethods copy all the other parameters ## from TOY_CLF(v_lambda=2) to TOY_CLF(v_lambda=1) as initialization ## 4. Finally call TOY_CLF(v_lambda=1).transform print("WarmStartMethods ==========================") ps_methods = WarmStartMethods(
if __name__ == "__main__": ## 1) Build dataset ## ================================================ X, y = datasets.make_classification(n_samples=10, n_features=5, n_informative=2, random_state=1) Xy = {"X": X, "y": y} ## 2) Build Methods ## ================================================ print "Methods ===================================" methods = Methods(*[TOY_CLF(v_lambda=v_lambda) for v_lambda in [2, 1]]) print methods.run(**Xy) ## 3) Build WarmStartMethods like Methods ## ================================================ ## WarmStartMethods ## / \ ## TOY_CLF(v_lambda=2) TOY_CLF(v_lambda=1) ## ## 1. WarmStartMethods will look for different argumenets as signature ## For example, here is v_lambda, there are different for each leaf ## 2. And then run TOY_CLF(v_lambda=2).transform ## 3. Except v_lambda, WarmStartMethods copy all the other parameters ## from TOY_CLF(v_lambda=2) to TOY_CLF(v_lambda=1) as initialization ## 4. Finally call TOY_CLF(v_lambda=1).transform print "WarmStartMethods ==========================" ps_methods = WarmStartMethods(*[TOY_CLF(v_lambda=v_lambda)
# possible to convert it to CSV format from epac import export_leaves_csv export_leaves_csv(pipe, 'my_result_run.csv') ## Parallelization ## =============== # Multi-classifiers # ----------------- # Methods Methods (Splitter) # / \ # SVM(C=1) SVM(C=10) Classifiers (Estimator) from epac import Methods multi = Methods(SVM(C=1), SVM(C=10)) multi.run(X=X, y=y) print(multi.reduce()) # Reduce format outputs into "ResultSet" which is a dict-like structure # which contains the "keys" of the methods that have beeen used. # You can also export the results of the bottom-up operation (reduce) to CSV from epac import export_resultset_csv export_resultset_csv(multi.reduce(), 'my_result_reduce.csv') # Methods Methods (Splitter) # / \ # SVM(l1, C=1) SVM(l1, C=10) ..... SVM(l2, C=10) Classifiers (Estimator) svms = Methods( *[SVM(loss=loss, C=C) for loss in ("l1", "l2") for C in [1, 10]])
# and produces a dictionnary as output. The output is passed to the next node. # The return value of the run is simply agregation of the outputs (dict) of # the leaf nodes ## Parallelization ## =============== # Multi-classifiers # ----------------- # Methods Methods (Splitter) # / \ # SVM(C=1) SVM(C=10) Classifiers (Estimator) from epac import Methods multi = Methods(SVM(C=1), SVM(C=10)) multi.run(X=X, y=y) print multi.reduce() # Reduce format outputs into "ResultSet" which is a dict-like structure # which contains the "keys" of the methods that as beeen used. # Methods Methods (Splitter) # / \ # SVM(l1, C=1) SVM(l1, C=10) ..... SVM(l2, C=10) Classifiers (Estimator) svms = Methods(*[SVM(loss=loss, C=C) for loss in ("l1", "l2") for C in [1, 10]]) svms.run(X=X, y=y) print svms.reduce() # Parallelize sequential Pipeline: Anova(k best selection) + SVM. # Methods Methods (Splitter)
svms = Methods(SVM(penalty="l1", class_weight='auto', dual=False), SVM(penalty="l2", class_weight='auto', dual=False)) cv = CV(svms, n_folds=n_folds) cv.run(X=X, y=y) res_cv_svms = cv.reduce() # print res_cv_svms print res_cv_svms["LinearSVC(penalty=l1)"]['y/test/score_recall'] print res_cv_svms["LinearSVC(penalty=l2)"]['y/test/score_recall'] # !!! BIASED RESULT !!! # Re-fit on all data to see which mode is choosen. Warning !!! this is biased # since all data have been used. We use for information only. No score can be # used from it. We look the weights map. svms.run(X=X, y=y) print svms.children[0] svms.children[0].estimator.coef_ print svms.children[1] svms.children[1].estimator.coef_ print "Weights given by SVMs" d = dict(var = imaging_variables, svm_weights_l1 = svms.children[0].estimator.coef_.ravel(), svm_weights_l2 = svms.children[1].estimator.coef_.ravel()) print pd.DataFrame(d).to_string() ############################################################################## # Automatic model selection: "CVBestSearchRefit" from epac import CVBestSearchRefit, Methods, CV