def test_examples_local_engine(self): list_all_examples = get_wf_example_classes() for example in list_all_examples: # if example().__class__.__name__ == "WFExample1" or\ # example().__class__.__name__ == "WFExample2": # example = list_all_examples[0] wf = example().get_workflow() local_engine_wf = example().get_workflow() sfw_engine_wf = example().get_workflow() wf.run(X=self.X, y=self.y) local_engine = LocalEngine(tree_root=local_engine_wf, num_processes=self.n_cores) local_engine_wf = local_engine.run(X=self.X, y=self.y) sfw_engine = SomaWorkflowEngine(tree_root=sfw_engine_wf, num_processes=self.n_cores, #resource_id="ed203246@gabriel", #login="******", remove_finished_wf=False, remove_local_tree=False ) sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y) self.assertTrue(compare_two_node(wf, local_engine_wf)) self.assertTrue(compare_two_node(wf, sfw_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
def test_examples_local_engine(self): list_all_examples = get_wf_example_classes() for example in list_all_examples: # if example().__class__.__name__ == "WFExample1" or\ # example().__class__.__name__ == "WFExample2": # example = list_all_examples[0] wf = example().get_workflow() local_engine_wf = example().get_workflow() sfw_engine_wf = example().get_workflow() wf.run(X=self.X, y=self.y) local_engine = LocalEngine(tree_root=local_engine_wf, num_processes=self.n_cores) local_engine_wf = local_engine.run(X=self.X, y=self.y) sfw_engine = SomaWorkflowEngine( tree_root=sfw_engine_wf, num_processes=self.n_cores, #resource_id="ed203246@gabriel", #login="******", remove_finished_wf=False, remove_local_tree=False) sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y) self.assertTrue(compare_two_node(wf, local_engine_wf)) self.assertTrue(compare_two_node(wf, sfw_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
def test_examples_local_engine(self): list_all_examples = get_wf_example_classes() for example in list_all_examples: # example = list_all_examples[0] wf = example().get_workflow() wf.run(X=self.X, y=self.y) local_engine = LocalEngine(tree_root=wf, num_processes=self.n_cores) local_engine_wf = local_engine.run(X=self.X, y=self.y) sfw_engine = SomaWorkflowEngine( tree_root=wf, num_processes=self.n_cores) sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y) self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
""" from sklearn import datasets X, y = datasets.make_classification(n_samples=500, n_features=200000, n_informative=2, random_state=1) Xy = dict(X=X, y=y) ## 2) Building workflow ## ======================================================= print " -> Pt2 : X and y created, building workflow" from sklearn import svm, cross_validation #kfold = cross_validation.KFold(n=len(X), n_folds=3) #svc = svm.SVC(C=1, kernel='linear') #print [svc.fit(X[train], y[train]).score(X[test], y[test]) for train, test in kfold] from epac import CV, Methods cv_svm_local = CV(Methods(*[svm.SVC(kernel="linear"), svm.SVC(kernel="rbf")]), n_folds=3) print " -> Pt3 : Workflow built, defining local engine" cv_svm = None n_proc = 2 # Running on the local machine from epac import LocalEngine local_engine = LocalEngine(cv_svm_local, num_processes=n_proc) print " -> Pt4 : Running" cv_svm = local_engine.run(**Xy) print " -> Success with %i procs!" % n_proc
Xy = dict(X=X, y=y) ## 2) Build two workflows respectively ## ======================================================= from sklearn.svm import SVC from epac import CV, Methods cv_svm_local = CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3) cv_svm_swf = CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3) ## 3) Run two workflows using local engine and soma-workflow ## ========================================================= from epac import LocalEngine local_engine = LocalEngine(cv_svm_local, num_processes=2) cv_svm = local_engine.run(X=X, y=y) print cv_svm.reduce() from epac import SomaWorkflowEngine swf_engine = SomaWorkflowEngine(cv_svm_swf, num_processes=2, #resource_id="jl237561@gabriel", #login="******", remove_finished_wf=False) cv_svm = swf_engine.run(**Xy) print cv_svm.reduce()
y_n_groups = 2 print "jinpeng pt2" X = np.random.randn(n_samples, n_xfeatures) Y = np.random.randn(n_samples, n_yfeatures) x_group_indices = np.array([random.randint(0, x_n_groups)\ for i in xrange(n_xfeatures)]) # y_group_indices = np.array([random.randint(0, y_n_groups)\ # for i in xrange(n_yfeatures)]) y_group_indices = np.zeros(n_yfeatures) print "jinpeng pt3" # 1) Prediction for each X block return a n_samples x n_yfeatures mulm = ColumnSplitter(MUOLS(), x_group_indices, y_group_indices) # mulm.run(X=X, Y=Y) local_engine = LocalEngine(tree_root=mulm, num_processes=2) mulm = local_engine.run(X=X, Y=Y) print "jinpeng pt4" for leaf in mulm.walk_leaves(): print "===============leaf.load_results()=================" print "key =", leaf.get_key() tab = leaf.load_results() print tab["MUOLS"]['Y/pred'] #print "jinpeng pt5" ## 1) Prediction for each X block return a n_samples x n_yfeatures #mulm_stats = ColumnSplitter(MUOLSStats(), x_group_indices, y_group_indices) ##mulm_stats.run(X=X, Y=Y) #local_engine = LocalEngine(tree_root=mulm_stats, num_processes=2) #mulm_stats = local_engine.run(X=X, Y=Y)
y = convert2memmap(y) Xy = dict(X=X, y=y) ## 2) Build two workflows respectively ## ======================================================= from sklearn.svm import SVC from epac import CV, Methods cv_svm_local = CV( Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3) cv_svm_swf = CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3) ## 3) Run two workflows using local engine and soma-workflow ## ========================================================= from epac import LocalEngine local_engine = LocalEngine(cv_svm_local, num_processes=2) cv_svm = local_engine.run(X=X, y=y) print(cv_svm.reduce()) from epac import SomaWorkflowEngine swf_engine = SomaWorkflowEngine( cv_svm_swf, num_processes=2, #resource_id="jl237561@gabriel", #login="******", remove_finished_wf=False) cv_svm = swf_engine.run(**Xy) print(cv_svm.reduce())
def test_memmapping(self): ## 1) Building dataset ## ============================================================ if self.memmap: # If the proc is 1, always generate the matrix # Otherwise, load it if it exists, or create it if it doesn't writing_mode = (self.n_proc == 1) X = create_mmat(self.n_samples, self.n_features, dir=self.directory, writing_mode=writing_mode) y = create_array(self.n_samples, [0, 1], dir=self.directory, writing_mode=writing_mode) Xy = dict(X=X, y=y) else: X, y = datasets.make_classification(n_samples=self.n_samples, n_features=self.n_features, n_informative=2, random_state=1) Xy = dict(X=X, y=y) ## 2) Building workflow ## ======================================================= from sklearn.svm import SVC from epac import CV, Methods cv_svm_local = CV(Methods(*[SVC( kernel="linear"), SVC(kernel="rbf")]), n_folds=3) cv_svm = None if self.is_swf: # Running on the cluster from epac import SomaWorkflowEngine mmap_mode = None if self.memmap: mmap_mode = "r+" swf_engine = SomaWorkflowEngine( cv_svm_local, num_processes=self.n_proc, resource_id="jl237561@gabriel", login="******", # remove_finished_wf=False, # remove_local_tree=False, mmap_mode=mmap_mode, queue="Global_long") cv_svm = swf_engine.run(**Xy) # Printing information about the jobs time.sleep(2) print('') sum_memory = 0 max_time_cost = 0 for job_info in swf_engine.engine_info: print( "mem_cost = {0}, vmem_cost = {1}, time_cost = {2}".format( job_info.mem_cost, job_info.vmem_cost, job_info.time_cost)) sum_memory += job_info.mem_cost if max_time_cost < job_info.time_cost: max_time_cost = job_info.time_cost print("sum_memory = ", sum_memory) print("max_time_cost = ", max_time_cost) else: # Running on the local machine from epac import LocalEngine local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc) cv_svm = local_engine.run(**Xy) cv_svm_reduce = cv_svm.reduce() print("\n -> Reducing results") print(cv_svm_reduce) # Creating the directory to save results, if it doesn't exist dirname = 'tmp_save_tree/' if self.directory is None: directory = '/tmp' else: directory = self.directory if not os.path.isdir(directory): os.mkdir(directory) dirpath = os.path.join(directory, dirname) if not os.path.isdir(dirpath): os.mkdir(dirpath) if self.n_proc == 1: ## 4.1) Saving results on the disk for one process ## =================================================== store = StoreFs(dirpath=dirpath, clear=True) cv_svm.save_tree(store=store) with open(os.path.join(directory, "tmp_save_results"), 'w+') \ as filename: print(filename.name) pickle.dump(cv_svm_reduce, filename) else: ## 4.2) Loading the results for one process ## =================================================== try: store = StoreFs(dirpath=dirpath, clear=False) cv_svm_one_proc = store.load() with open(os.path.join(directory, "tmp_save_results"), 'r+') \ as filename: cv_svm_reduce_one_proc = pickle.load(filename) ## 5.2) Comparing results to the results for one process ## =================================================== print("\nComparing %i proc with one proc" % self.n_proc) self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc)) self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc)) except KeyError: print("Warning: ") print("No previous tree detected, no possible "\ "comparison of results")
def test_memmapping(self): ## 1) Building dataset ## ============================================================ if self.memmap: # If the proc is 1, always generate the matrix # Otherwise, load it if it exists, or create it if it doesn't writing_mode = (self.n_proc == 1) X = create_mmat(self.n_samples, self.n_features, dir=self.directory, writing_mode=writing_mode) y = create_array(self.n_samples, [0, 1], dir=self.directory, writing_mode=writing_mode) Xy = dict(X=X, y=y) else: X, y = datasets.make_classification(n_samples=self.n_samples, n_features=self.n_features, n_informative=2, random_state=1) Xy = dict(X=X, y=y) ## 2) Building workflow ## ======================================================= from sklearn.svm import SVC from epac import CV, Methods cv_svm_local = CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3) cv_svm = None if self.is_swf: # Running on the cluster from epac import SomaWorkflowEngine mmap_mode = None if self.memmap: mmap_mode = "r+" swf_engine = SomaWorkflowEngine(cv_svm_local, num_processes=self.n_proc, resource_id="jl237561@gabriel", login="******", # remove_finished_wf=False, # remove_local_tree=False, mmap_mode=mmap_mode, queue="Global_long") cv_svm = swf_engine.run(**Xy) # Printing information about the jobs time.sleep(2) print '' sum_memory = 0 max_time_cost = 0 for job_info in swf_engine.engine_info: print "mem_cost=", job_info.mem_cost, \ ", vmem_cost=", job_info.vmem_cost, \ ", time_cost=", job_info.time_cost sum_memory += job_info.mem_cost if max_time_cost < job_info.time_cost: max_time_cost = job_info.time_cost print "sum_memory =", sum_memory print "max_time_cost =", max_time_cost else: # Running on the local machine from epac import LocalEngine local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc) cv_svm = local_engine.run(**Xy) cv_svm_reduce = cv_svm.reduce() print "\n -> Reducing results" print cv_svm_reduce # Creating the directory to save results, if it doesn't exist dirname = 'tmp_save_tree/' if self.directory is None: directory = '/tmp' else: directory = self.directory if not os.path.isdir(directory): os.mkdir(directory) dirpath = os.path.join(directory, dirname) if not os.path.isdir(dirpath): os.mkdir(dirpath) if self.n_proc == 1: ## 4.1) Saving results on the disk for one process ## =================================================== store = StoreFs(dirpath=dirpath, clear=True) cv_svm.save_tree(store=store) with open(os.path.join(directory, "tmp_save_results"), 'w+') \ as filename: print filename.name pickle.dump(cv_svm_reduce, filename) else: ## 4.2) Loading the results for one process ## =================================================== try: store = StoreFs(dirpath=dirpath, clear=False) cv_svm_one_proc = store.load() with open(os.path.join(directory, "tmp_save_results"), 'r+') \ as filename: cv_svm_reduce_one_proc = pickle.load(filename) ## 5.2) Comparing results to the results for one process ## =================================================== print "\nComparing %i proc with one proc" % self.n_proc self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc)) self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc)) except KeyError: print "Warning: " print "No previous tree detected, no possible "\ "comparison of results"