def test_mysvc_reducer(self): ## 1) Build dataset ## =================================================================== X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2, random_state=1) ## 2) run with Methods ## =================================================================== my_svc1 = MySVC(C=1.0) my_svc2 = MySVC(C=2.0) two_svc_single = Methods(my_svc1, my_svc2) two_svc_local = Methods(my_svc1, my_svc2) two_svc_swf = Methods(my_svc1, my_svc2) two_svc_single.reducer = MyReducer() two_svc_local.reducer = MyReducer() two_svc_swf.reducer = MyReducer() for leaf in two_svc_single.walk_leaves(): print leaf.get_key() for leaf in two_svc_local.walk_leaves(): print leaf.get_key() for leaf in two_svc_swf.walk_leaves(): print leaf.get_key() # top-down process to call transform two_svc_single.run(X=X, y=y) # buttom-up process to compute scores res_single = two_svc_single.reduce() ### You can get below results: ### ================================================================== ### [{'MySVC(C=1.0)': array([ 1., 1.])}, {'MySVC(C=2.0)': array([ 1., 1.])}] ### 3) Run using local multi-processes ### ================================================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(two_svc_local, num_processes=2) two_svc_local = local_engine.run(**dict(X=X, y=y)) res_local = two_svc_local.reduce() ### 4) Run using soma-workflow ### ================================================================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2) two_svc_swf = sfw_engine.run(**dict(X=X, y=y)) res_swf = two_svc_swf.reduce() if not repr(res_swf) == repr(res_local): raise ValueError("Cannot dump class definition") if not repr(res_swf) == repr(res_single): raise ValueError("Cannot dump class definition")
def test_mysvc_reducer(self): ## 1) Build dataset ## =================================================================== X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2, random_state=1) ## 2) run with Methods ## =================================================================== my_svc1 = MySVC(C=1.0) my_svc2 = MySVC(C=2.0) two_svc_single = Methods(my_svc1, my_svc2) two_svc_local = Methods(my_svc1, my_svc2) two_svc_swf = Methods(my_svc1, my_svc2) two_svc_single.reducer = MyReducer() two_svc_local.reducer = MyReducer() two_svc_swf.reducer = MyReducer() for leaf in two_svc_single.walk_leaves(): print(leaf.get_key()) for leaf in two_svc_local.walk_leaves(): print(leaf.get_key()) for leaf in two_svc_swf.walk_leaves(): print(leaf.get_key()) # top-down process to call transform two_svc_single.run(X=X, y=y) # buttom-up process to compute scores res_single = two_svc_single.reduce() ### You can get below results: ### ================================================================== ### [{'MySVC(C=1.0)': array([ 1., 1.])}, {'MySVC(C=2.0)': array([ 1., 1.])}] ### 3) Run using local multi-processes ### ================================================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(two_svc_local, num_processes=2) two_svc_local = local_engine.run(**dict(X=X, y=y)) res_local = two_svc_local.reduce() ### 4) Run using soma-workflow ### ================================================================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2) two_svc_swf = sfw_engine.run(**dict(X=X, y=y)) res_swf = two_svc_swf.reduce() if not repr(res_swf) == repr(res_local): raise ValueError("Cannot dump class definition") if not repr(res_swf) == repr(res_single): raise ValueError("Cannot dump class definition")
def test_constructor_avoid_collision_level2(self): # Test that level 2 collisions are avoided pm = Methods(*[Pipe(SelectKBest(k=2), SVC(kernel="linear", C=C))\ for C in [1, 10]]) leaves_key = [l.get_key() for l in pm.walk_leaves()] self.assertTrue(len(leaves_key) == len(set(leaves_key)), u'Collision could not be avoided')
def test_constructor_avoid_collision_level2(self): # Test that level 2 collisions are avoided pm = Methods(*[Pipe(SelectKBest(k=2), SVC(kernel="linear", C=C)) for C in [1, 10]]) leaves_key = [l.get_key() for l in pm.walk_leaves()] self.assertTrue(len(leaves_key) == len(set(leaves_key)), u'Collision could not be avoided')
def test_constructor_avoid_collision_level1(self): # Test that level 1 collisions are avoided pm = Methods(*[SVC(kernel="linear", C=C) for C in [1, 10]]) leaves_key = [l.get_key() for l in pm.walk_leaves()] self.assertTrue(len(leaves_key) == len(set(leaves_key)), u"Collision could not be avoided")
wf = CVBestSearchRefit(Methods(SVM(C=1), SVM(C=10))) wf.run(X=X, y=y) print(wf.reduce()) # Feature selection combined with SVM and LDA # CVBestSearchRefit # Methods (Splitter) # / \ # KBest(1) KBest(5) SelectKBest (Estimator) # | # Methods (Splitter) # / \ # LDA() SVM() ... Classifiers (Estimator) pipelines = Methods( *[Pipe(SelectKBest(k=k), Methods(LDA(), SVM())) for k in [1, 5]]) print([n for n in pipelines.walk_leaves()]) best_cv = CVBestSearchRefit(pipelines) best_cv.run(X=X, y=y) best_cv.reduce() # Put it in an outer CV cv = CV(best_cv) cv.run(X=X, y=y) cv.reduce() # Perms + Cross-validation of SVM(linear) and SVM(rbf) # ------------------------------------- # Perms Perm (Splitter) # / | \ # 0 1 2 Samples (Slicer) # |
# CV + Grid search of a simple classifier wf = CVBestSearchRefit(Methods(SVM(C=1), SVM(C=10))) wf.run(X=X, y=y) print wf.reduce() # Feature selection combined with SVM and LDA # CVBestSearchRefit # Methods (Splitter) # / \ # KBest(1) KBest(5) SelectKBest (Estimator) # | # Methods (Splitter) # / \ # LDA() SVM() ... Classifiers (Estimator) pipelines = Methods(*[Pipe(SelectKBest(k=k), Methods(LDA(), SVM())) for k in [1, 5]]) print [n for n in pipelines.walk_leaves()] best_cv = CVBestSearchRefit(pipelines) best_cv.run(X=X, y=y) best_cv.reduce() # Put it in an outer CV cv = CV(best_cv) cv.run(X=X, y=y) cv.reduce() # Perms + Cross-validation of SVM(linear) and SVM(rbf) # ------------------------------------- # Perms Perm (Splitter) # / | \ # 0 1 2 Samples (Slicer) # |
print svms_auto.refited.estimator.coef_ ############################################################################## # Put everything together # Pipeline, "Pipe": SelectKBest + StandardScaler + SVM l1 vs l2 from epac import range_log2 from epac import CVBestSearchRefit, Pipe, Methods, CV k_values = range_log2(X.shape[1], add_n=True) C_values = [.1, 1, 10, 100] anova_svms = Methods(*[Pipe(SelectKBest(k=k), preprocessing.StandardScaler(), Methods(*[SVM(C=C, penalty=penalty, class_weight='auto', dual=False) for C in C_values for penalty in ['l1', 'l2']])) for k in k_values]) # Take a look print [l for l in anova_svms.walk_leaves()] ## k and C selection based on CV anova_svms_auto = CVBestSearchRefit(anova_svms) #anova_svm_all = Methods(anova_svm, anova_svm_cv) cv = CV(anova_svms_auto, n_folds=n_folds) time_fit_predict = time.time() cv.run(X=X, y=y) print time.time() - time_fit_predict print cv.reduce() # Re-fit on all data. Warning: biased !!! anova_svms_auto.run(X=X, y=y) print anova_svms_auto.best_params