def do_all(options): if options.k_max != "auto": k_values = range_log2(np.minimum(int(options.k_max), options.n_features), add_n=True) else: k_values = range_log2(options.n_features, add_n=True) C_values = [1, 10] random_state = 0 #print options #sys.exit(0) if options.trace: from epac import conf conf.TRACE_TOPDOWN = True ## 1) Build dataset ## ================ X, y = datasets.make_classification(n_samples=options.n_samples, n_features=options.n_features, n_informative=options.n_informative) ## 2) Build Workflow ## ================= time_start = time.time() ## CV + Grid search of a pipeline with a nested grid search cls = Methods(*[Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values for k in k_values]) pipeline = CVBestSearchRefit(cls, n_folds=options.n_folds_nested, random_state=random_state) wf = Perms(CV(pipeline, n_folds=options.n_folds), n_perms=options.n_perms, permute="y", random_state=random_state) print "Time ellapsed, tree construction:", time.time() - time_start ## 3) Run Workflow ## =============== time_fit_predict = time.time() ## Run on local machine sfw_engine = SomaWorkflowEngine( tree_root=wf, num_processes=options.n_cores ) ## Run on cluster # sfw_engine = SomaWorkflowEngine( # tree_root=wf, # num_processes=options.n_cores, # resource_id="jl237561@gabriel", # login="******") # You can use soma_workflow_gui to track your progress wf = sfw_engine.run(X=X, y=y) print "Time ellapsed, fit predict:", time.time() - time_fit_predict time_reduce = time.time() ## 4) Reduce Workflow ## ================== print wf.reduce() print "Time ellapsed, reduce:", time.time() - time_reduce
def do_all(options): if options.k_max != "auto": k_values = range_log2(np.minimum(int(options.k_max), options.n_features), add_n=True) else: k_values = range_log2(options.n_features, add_n=True) C_values = [1, 10] random_state = 0 #print options #sys.exit(0) if options.trace: from epac import conf conf.TRACE_TOPDOWN = True ## 1) Build dataset ## ================ X, y = datasets.make_classification(n_samples=options.n_samples, n_features=options.n_features, n_informative=options.n_informative) ## 2) Build Workflow ## ================= time_start = time.time() ## CV + Grid search of a pipeline with a nested grid search cls = Methods(*[ Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values for k in k_values ]) pipeline = CVBestSearchRefit(cls, n_folds=options.n_folds_nested, random_state=random_state) wf = Perms(CV(pipeline, n_folds=options.n_folds), n_perms=options.n_perms, permute="y", random_state=random_state) print "Time ellapsed, tree construction:", time.time() - time_start ## 3) Run Workflow ## =============== time_fit_predict = time.time() ## Run on local machine sfw_engine = SomaWorkflowEngine(tree_root=wf, num_processes=options.n_cores) ## Run on cluster # sfw_engine = SomaWorkflowEngine( # tree_root=wf, # num_processes=options.n_cores, # resource_id="jl237561@gabriel", # login="******") wf = sfw_engine.run(X=X, y=y) print "Time ellapsed, fit predict:", time.time() - time_fit_predict time_reduce = time.time() ## 4) Reduce Workflow ## ================== print wf.reduce() print "Time ellapsed, reduce:", time.time() - time_reduce
def do_all(options): if options.k_max != "auto": k_values = range_log2(np.minimum(int(options.k_max), options.n_features), add_n=True) else: k_values = range_log2(options.n_features, add_n=True) C_values = [1, 10] random_state = 0 # print options # sys.exit(0) if options.trace: from epac import conf conf.TRACE_TOPDOWN = True ## 1) Build dataset ## ================ X, y = datasets.make_classification( n_samples=options.n_samples, n_features=options.n_features, n_informative=options.n_informative ) ## 2) Build Workflow ## ================= time_start = time.time() ## CV + Grid search of a pipeline with a nested grid search cls = Methods(*[Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values for k in k_values]) pipeline = CVBestSearchRefit(cls, n_folds=options.n_folds_nested, random_state=random_state) wf = Perms(CV(pipeline, n_folds=options.n_folds), n_perms=options.n_perms, permute="y", random_state=random_state) print "Time ellapsed, tree construction:", time.time() - time_start ## 3) Export Workflow to soma_workflow_gui ## =============== time_fit_predict = time.time() if os.path.isdir(options.soma_workflow_dir): shutil.rmtree(options.soma_workflow_dir) sfw_engine = SomaWorkflowEngine(tree_root=wf, num_processes=options.n_cores) sfw_engine.export_to_gui(options.soma_workflow_dir, X=X, y=y) print "Time ellapsed, fit predict:", time.time() - time_fit_predict # ## 6) Load Epac tree & Reduce # ## ========================== reduce_filename = os.path.join(options.soma_workflow_dir, "reduce.py") f = open(reduce_filename, "w") reduce_str = ( """from epac.map_reduce.engine import SomaWorkflowEngine wf = SomaWorkflowEngine.load_from_gui("%s") print wf.reduce() """ % options.soma_workflow_dir ) f.write(reduce_str) f.close() print "#First run\n" "soma_workflow_gui\n" "\t(1)Open %s\n" "\t(2)Submit\n" "\t(3)Transfer Input Files\n" "\t...wait...\n" "\t(4)Transfer Output Files\n" "#When done run:\npython %s" % ( os.path.join(options.soma_workflow_dir, sfw_engine.open_me_by_soma_workflow_gui), reduce_filename, )
def test_mysvc_reducer(self): ## 1) Build dataset ## =================================================================== X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2, random_state=1) ## 2) run with Methods ## =================================================================== my_svc1 = MySVC(C=1.0) my_svc2 = MySVC(C=2.0) two_svc_single = Methods(my_svc1, my_svc2) two_svc_local = Methods(my_svc1, my_svc2) two_svc_swf = Methods(my_svc1, my_svc2) two_svc_single.reducer = MyReducer() two_svc_local.reducer = MyReducer() two_svc_swf.reducer = MyReducer() for leaf in two_svc_single.walk_leaves(): print leaf.get_key() for leaf in two_svc_local.walk_leaves(): print leaf.get_key() for leaf in two_svc_swf.walk_leaves(): print leaf.get_key() # top-down process to call transform two_svc_single.run(X=X, y=y) # buttom-up process to compute scores res_single = two_svc_single.reduce() ### You can get below results: ### ================================================================== ### [{'MySVC(C=1.0)': array([ 1., 1.])}, {'MySVC(C=2.0)': array([ 1., 1.])}] ### 3) Run using local multi-processes ### ================================================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(two_svc_local, num_processes=2) two_svc_local = local_engine.run(**dict(X=X, y=y)) res_local = two_svc_local.reduce() ### 4) Run using soma-workflow ### ================================================================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2) two_svc_swf = sfw_engine.run(**dict(X=X, y=y)) res_swf = two_svc_swf.reduce() if not repr(res_swf) == repr(res_local): raise ValueError("Cannot dump class definition") if not repr(res_swf) == repr(res_single): raise ValueError("Cannot dump class definition")
def test_mysvc_reducer(self): ## 1) Build dataset ## =================================================================== X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2, random_state=1) ## 2) run with Methods ## =================================================================== my_svc1 = MySVC(C=1.0) my_svc2 = MySVC(C=2.0) two_svc_single = Methods(my_svc1, my_svc2) two_svc_local = Methods(my_svc1, my_svc2) two_svc_swf = Methods(my_svc1, my_svc2) two_svc_single.reducer = MyReducer() two_svc_local.reducer = MyReducer() two_svc_swf.reducer = MyReducer() for leaf in two_svc_single.walk_leaves(): print(leaf.get_key()) for leaf in two_svc_local.walk_leaves(): print(leaf.get_key()) for leaf in two_svc_swf.walk_leaves(): print(leaf.get_key()) # top-down process to call transform two_svc_single.run(X=X, y=y) # buttom-up process to compute scores res_single = two_svc_single.reduce() ### You can get below results: ### ================================================================== ### [{'MySVC(C=1.0)': array([ 1., 1.])}, {'MySVC(C=2.0)': array([ 1., 1.])}] ### 3) Run using local multi-processes ### ================================================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(two_svc_local, num_processes=2) two_svc_local = local_engine.run(**dict(X=X, y=y)) res_local = two_svc_local.reduce() ### 4) Run using soma-workflow ### ================================================================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2) two_svc_swf = sfw_engine.run(**dict(X=X, y=y)) res_swf = two_svc_swf.reduce() if not repr(res_swf) == repr(res_local): raise ValueError("Cannot dump class definition") if not repr(res_swf) == repr(res_single): raise ValueError("Cannot dump class definition")
def do_all(options): if options.k_max != "auto": k_values = range_log2(np.minimum(int(options.k_max), options.n_features), add_n=True) else: k_values = range_log2(options.n_features, add_n=True) C_values = [1, 10] random_state = 0 #print options #sys.exit(0) if options.trace: from epac import conf conf.TRACE_TOPDOWN = True ## 1) Build dataset ## ================ X, y = datasets.make_classification(n_samples=options.n_samples, n_features=options.n_features, n_informative=options.n_informative) ## 2) Build Workflow ## ================= time_start = time.time() ## CV + Grid search of a pipeline with a nested grid search cls = Methods(*[ Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values for k in k_values ]) pipeline = CVBestSearchRefit(cls, n_folds=options.n_folds_nested, random_state=random_state) wf = Perms(CV(pipeline, n_folds=options.n_folds), n_perms=options.n_perms, permute="y", random_state=random_state) print "Time ellapsed, tree construction:", time.time() - time_start ## 3) Export Workflow to soma_workflow_gui ## =============== time_fit_predict = time.time() if os.path.isdir(options.soma_workflow_dir): shutil.rmtree(options.soma_workflow_dir) sfw_engine = SomaWorkflowEngine(tree_root=wf, num_processes=options.n_cores) sfw_engine.export_to_gui(options.soma_workflow_dir, X=X, y=y) print "Time ellapsed, fit predict:", time.time() - time_fit_predict # ## 6) Load Epac tree & Reduce # ## ========================== reduce_filename = os.path.join(options.soma_workflow_dir, "reduce.py") f = open(reduce_filename, 'w') reduce_str = """from epac.map_reduce.engine import SomaWorkflowEngine wf = SomaWorkflowEngine.load_from_gui("%s") print wf.reduce() """ % options.soma_workflow_dir f.write(reduce_str) f.close() print "#First run\n"\ "soma_workflow_gui\n"\ "\t(1)Open %s\n"\ "\t(2)Submit\n"\ "\t(3)Transfer Input Files\n"\ "\t...wait...\n"\ "\t(4)Transfer Output Files\n"\ "#When done run:\npython %s" % ( os.path.join(options.soma_workflow_dir, sfw_engine.open_me_by_soma_workflow_gui), reduce_filename)
n_perms=n_perms, permute="y", random_state=random_state) # wf.run(X=X, y=y) # for leaf in wf.walk_leaves(): # print leaf.load_results() # wf.reduce() # from epac.map_reduce.engine import LocalEngine # local_engine = LocalEngine(tree_root=wf, num_processes=2) # wf = local_engine.run(X=X, y=y) # for leaf in wf.walk_leaves(): # print leaf.load_results() # wf.reduce() from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine( tree_root=wf, num_processes=3, remove_finished_wf=False, remove_local_tree=False) wf = sfw_engine.run(X=X, y=y) #for leaf in wf.walk_leaves(): # print leaf.load_results() #for node in wf.walk_true_nodes(): # print node # print node.load_results() print wf.reduce()
from sklearn.svm import SVC svc = SVC(C=self.C) svc.fit(X, y) # "transform" should return a dictionary: ie.: a result, keys are abritrary return {"y/pred": svc.predict(X), "y/true": y} best_svc_tranform = Methods(SVMTransform(C=1.0), SVMTransform(C=2.0)) cv = CV(best_svc_tranform, cv_key="y", cv_type="stratified", n_folds=2, reducer=None) cv.run(X=X, y=y) # top-down process to call transform cv.reduce() # buttom-up process # ## 4) Run using local multi-processes ## ================================== from epac.map_reduce.engine import LocalEngine local_engine = LocalEngine(best_svc, num_processes=2) best_svc = local_engine.run(**dict(X=X, y=y)) best_svc_tranform.reduce() ## 5) Run using soma-workflow ## ========================== from epac.map_reduce.engine import SomaWorkflowEngine sfw_engine = SomaWorkflowEngine(tree_root=best_svc, num_processes=2) best_svc = sfw_engine.run(**dict(X=X, y=y)) best_svc.reduce()