def test_twomethods(self): key_y_pred = 'y' + conf.SEP + conf.PREDICTION X, y = datasets.make_classification(n_samples=20, n_features=5, n_informative=2) # = With EPAC wf = Methods(LDA(), SVC(kernel="linear")) r_epac = wf.run(X=X, y=y) # = With SKLEARN lda = LDA() svm = SVC(kernel="linear") lda.fit(X, y) svm.fit(X, y) r_sklearn = [lda.predict(X), svm.predict(X)] # Comparison for i_cls in range(2): comp = np.all(np.asarray(r_epac[i_cls][key_y_pred]) == np.asarray(r_sklearn[i_cls])) self.assertTrue(comp, u'Diff Methods') # test reduce r_epac_reduce = [wf.reduce().values()[0][key_y_pred], wf.reduce().values()[1][key_y_pred]] comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn)) self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
def test_mysvc_reducer(self): ## 1) Build dataset ## =================================================================== X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2, random_state=1) ## 2) run with Methods ## =================================================================== my_svc1 = MySVC(C=1.0) my_svc2 = MySVC(C=2.0) two_svc_single = Methods(my_svc1, my_svc2) two_svc_local = Methods(my_svc1, my_svc2) two_svc_swf = Methods(my_svc1, my_svc2) two_svc_single.reducer = MyReducer() two_svc_local.reducer = MyReducer() two_svc_swf.reducer = MyReducer() for leaf in two_svc_single.walk_leaves(): print leaf.get_key() for leaf in two_svc_local.walk_leaves(): print leaf.get_key() for leaf in two_svc_swf.walk_leaves(): print leaf.get_key() # top-down process to call transform two_svc_single.run(X=X, y=y) # buttom-up process to compute scores res_single = two_svc_single.reduce() ### You can get below results: ### ================================================================== ### [{'MySVC(C=1.0)': array([ 1., 1.])}, {'MySVC(C=2.0)': array([ 1., 1.])}] ### 3) Run using local multi-processes ### ================================================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(two_svc_local, num_processes=2) two_svc_local = local_engine.run(**dict(X=X, y=y)) res_local = two_svc_local.reduce() ### 4) Run using soma-workflow ### ================================================================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2) two_svc_swf = sfw_engine.run(**dict(X=X, y=y)) res_swf = two_svc_swf.reduce() if not repr(res_swf) == repr(res_local): raise ValueError("Cannot dump class definition") if not repr(res_swf) == repr(res_single): raise ValueError("Cannot dump class definition")
def test_mysvc_reducer(self): ## 1) Build dataset ## =================================================================== X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2, random_state=1) ## 2) run with Methods ## =================================================================== my_svc1 = MySVC(C=1.0) my_svc2 = MySVC(C=2.0) two_svc_single = Methods(my_svc1, my_svc2) two_svc_local = Methods(my_svc1, my_svc2) two_svc_swf = Methods(my_svc1, my_svc2) two_svc_single.reducer = MyReducer() two_svc_local.reducer = MyReducer() two_svc_swf.reducer = MyReducer() for leaf in two_svc_single.walk_leaves(): print(leaf.get_key()) for leaf in two_svc_local.walk_leaves(): print(leaf.get_key()) for leaf in two_svc_swf.walk_leaves(): print(leaf.get_key()) # top-down process to call transform two_svc_single.run(X=X, y=y) # buttom-up process to compute scores res_single = two_svc_single.reduce() ### You can get below results: ### ================================================================== ### [{'MySVC(C=1.0)': array([ 1., 1.])}, {'MySVC(C=2.0)': array([ 1., 1.])}] ### 3) Run using local multi-processes ### ================================================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(two_svc_local, num_processes=2) two_svc_local = local_engine.run(**dict(X=X, y=y)) res_local = two_svc_local.reduce() ### 4) Run using soma-workflow ### ================================================================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2) two_svc_swf = sfw_engine.run(**dict(X=X, y=y)) res_swf = two_svc_swf.reduce() if not repr(res_swf) == repr(res_local): raise ValueError("Cannot dump class definition") if not repr(res_swf) == repr(res_single): raise ValueError("Cannot dump class definition")
def test_twomethods(self): key_y_pred = 'y' + conf.SEP + conf.PREDICTION X, y = datasets.make_classification(n_samples=20, n_features=5, n_informative=2) # = With EPAC wf = Methods(LDA(), SVC(kernel="linear")) r_epac = wf.run(X=X, y=y) # = With SKLEARN lda = LDA() svm = SVC(kernel="linear") lda.fit(X, y) svm.fit(X, y) r_sklearn = [lda.predict(X), svm.predict(X)] # Comparison for i_cls in range(2): comp = np.all(np.asarray(r_epac[i_cls][key_y_pred]) == np.asarray(r_sklearn[i_cls])) self.assertTrue(comp, u'Diff Methods') # test reduce r_epac_reduce = [wf.reduce().values()[0][key_y_pred], wf.reduce().values()[1][key_y_pred]] comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn)) self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
from sklearn.svm import SVC svc = SVC(C=self.C) svc.fit(X, y) # "transform" should return a dictionary: ie.: a result, keys are abritrary return {"y/pred": svc.predict(X), "y/true": y} best_svc_tranform = Methods(SVMTransform(C=1.0), SVMTransform(C=2.0)) cv = CV(best_svc_tranform, cv_key="y", cv_type="stratified", n_folds=2, reducer=None) cv.run(X=X, y=y) # top-down process to call transform cv.reduce() # buttom-up process # ## 4) Run using local multi-processes ## ================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(best_svc, num_processes=2) best_svc = local_engine.run(**dict(X=X, y=y)) best_svc_tranform.reduce() ## 5) Run using soma-workflow ## ========================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=best_svc, num_processes=2) best_svc = sfw_engine.run(**dict(X=X, y=y)) best_svc.reduce()
from epac import export_leaves_csv export_leaves_csv(pipe, 'my_result_run.csv') ## Parallelization ## =============== # Multi-classifiers # ----------------- # Methods Methods (Splitter) # / \ # SVM(C=1) SVM(C=10) Classifiers (Estimator) from epac import Methods multi = Methods(SVM(C=1), SVM(C=10)) multi.run(X=X, y=y) print(multi.reduce()) # Reduce format outputs into "ResultSet" which is a dict-like structure # which contains the "keys" of the methods that have beeen used. # You can also export the results of the bottom-up operation (reduce) to CSV from epac import export_resultset_csv export_resultset_csv(multi.reduce(), 'my_result_reduce.csv') # Methods Methods (Splitter) # / \ # SVM(l1, C=1) SVM(l1, C=10) ..... SVM(l2, C=10) Classifiers (Estimator) svms = Methods( *[SVM(loss=loss, C=C) for loss in ("l1", "l2") for C in [1, 10]]) svms.run(X=X, y=y)
# The return value of the run is simply agregation of the outputs (dict) of # the leaf nodes ## Parallelization ## =============== # Multi-classifiers # ----------------- # Methods Methods (Splitter) # / \ # SVM(C=1) SVM(C=10) Classifiers (Estimator) from epac import Methods multi = Methods(SVM(C=1), SVM(C=10)) multi.run(X=X, y=y) print multi.reduce() # Reduce format outputs into "ResultSet" which is a dict-like structure # which contains the "keys" of the methods that as beeen used. # Methods Methods (Splitter) # / \ # SVM(l1, C=1) SVM(l1, C=10) ..... SVM(l2, C=10) Classifiers (Estimator) svms = Methods(*[SVM(loss=loss, C=C) for loss in ("l1", "l2") for C in [1, 10]]) svms.run(X=X, y=y) print svms.reduce() # Parallelize sequential Pipeline: Anova(k best selection) + SVM. # Methods Methods (Splitter) # / | \
from epac import export_leaves_csv export_leaves_csv(pipe, 'my_result_run.csv') ## Parallelization ## =============== # Multi-classifiers # ----------------- # Methods Methods (Splitter) # / \ # SVM(C=1) SVM(C=10) Classifiers (Estimator) from epac import Methods multi = Methods(SVM(C=1), SVM(C=10)) multi.run(X=X, y=y) print multi.reduce() # Reduce format outputs into "ResultSet" which is a dict-like structure # which contains the "keys" of the methods that have beeen used. # You can also export the results of the bottom-up operation (reduce) to CSV from epac import export_resultset_csv export_resultset_csv(multi.reduce(), 'my_result_reduce.csv') # Methods Methods (Splitter) # / \ # SVM(l1, C=1) SVM(l1, C=10) ..... SVM(l2, C=10) Classifiers (Estimator) svms = Methods(*[SVM(loss=loss, C=C) for loss in ("l1", "l2") for C in [1, 10]])