def test_examples_local_engine(self): list_all_examples = get_wf_example_classes() for example in list_all_examples: # if example().__class__.__name__ == "WFExample1" or\ # example().__class__.__name__ == "WFExample2": # example = list_all_examples[0] wf = example().get_workflow() local_engine_wf = example().get_workflow() sfw_engine_wf = example().get_workflow() wf.run(X=self.X, y=self.y) local_engine = LocalEngine(tree_root=local_engine_wf, num_processes=self.n_cores) local_engine_wf = local_engine.run(X=self.X, y=self.y) sfw_engine = SomaWorkflowEngine( tree_root=sfw_engine_wf, num_processes=self.n_cores, #resource_id="ed203246@gabriel", #login="******", remove_finished_wf=False, remove_local_tree=False) sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y) self.assertTrue(compare_two_node(wf, local_engine_wf)) self.assertTrue(compare_two_node(wf, sfw_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
def test_examples_local_engine(self): list_all_examples = get_wf_example_classes() for example in list_all_examples: # example = list_all_examples[0] wf = example().get_workflow() wf.run(X=self.X, y=self.y) local_engine = LocalEngine(tree_root=wf, num_processes=self.n_cores) local_engine_wf = local_engine.run(X=self.X, y=self.y) sfw_engine = SomaWorkflowEngine( tree_root=wf, num_processes=self.n_cores) sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y) self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
""" from sklearn import datasets X, y = datasets.make_classification(n_samples=500, n_features=200000, n_informative=2, random_state=1) Xy = dict(X=X, y=y) ## 2) Building workflow ## ======================================================= print " -> Pt2 : X and y created, building workflow" from sklearn import svm, cross_validation #kfold = cross_validation.KFold(n=len(X), n_folds=3) #svc = svm.SVC(C=1, kernel='linear') #print [svc.fit(X[train], y[train]).score(X[test], y[test]) for train, test in kfold] from epac import CV, Methods cv_svm_local = CV(Methods(*[svm.SVC(kernel="linear"), svm.SVC(kernel="rbf")]), n_folds=3) print " -> Pt3 : Workflow built, defining local engine" cv_svm = None n_proc = 2 # Running on the local machine from epac import LocalEngine local_engine = LocalEngine(cv_svm_local, num_processes=n_proc) print " -> Pt4 : Running" cv_svm = local_engine.run(**Xy) print " -> Success with %i procs!" % n_proc
def test_memmapping(self): ## 1) Building dataset ## ============================================================ if self.memmap: # If the proc is 1, always generate the matrix # Otherwise, load it if it exists, or create it if it doesn't writing_mode = (self.n_proc == 1) X = create_mmat(self.n_samples, self.n_features, dir=self.directory, writing_mode=writing_mode) y = create_array(self.n_samples, [0, 1], dir=self.directory, writing_mode=writing_mode) Xy = dict(X=X, y=y) else: X, y = datasets.make_classification(n_samples=self.n_samples, n_features=self.n_features, n_informative=2, random_state=1) Xy = dict(X=X, y=y) ## 2) Building workflow ## ======================================================= from sklearn.svm import SVC from epac import CV, Methods cv_svm_local = CV(Methods(*[SVC( kernel="linear"), SVC(kernel="rbf")]), n_folds=3) cv_svm = None if self.is_swf: # Running on the cluster from epac import SomaWorkflowEngine mmap_mode = None if self.memmap: mmap_mode = "r+" swf_engine = SomaWorkflowEngine( cv_svm_local, num_processes=self.n_proc, resource_id="jl237561@gabriel", login="******", # remove_finished_wf=False, # remove_local_tree=False, mmap_mode=mmap_mode, queue="Global_long") cv_svm = swf_engine.run(**Xy) # Printing information about the jobs time.sleep(2) print('') sum_memory = 0 max_time_cost = 0 for job_info in swf_engine.engine_info: print( "mem_cost = {0}, vmem_cost = {1}, time_cost = {2}".format( job_info.mem_cost, job_info.vmem_cost, job_info.time_cost)) sum_memory += job_info.mem_cost if max_time_cost < job_info.time_cost: max_time_cost = job_info.time_cost print("sum_memory = ", sum_memory) print("max_time_cost = ", max_time_cost) else: # Running on the local machine from epac import LocalEngine local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc) cv_svm = local_engine.run(**Xy) cv_svm_reduce = cv_svm.reduce() print("\n -> Reducing results") print(cv_svm_reduce) # Creating the directory to save results, if it doesn't exist dirname = 'tmp_save_tree/' if self.directory is None: directory = '/tmp' else: directory = self.directory if not os.path.isdir(directory): os.mkdir(directory) dirpath = os.path.join(directory, dirname) if not os.path.isdir(dirpath): os.mkdir(dirpath) if self.n_proc == 1: ## 4.1) Saving results on the disk for one process ## =================================================== store = StoreFs(dirpath=dirpath, clear=True) cv_svm.save_tree(store=store) with open(os.path.join(directory, "tmp_save_results"), 'w+') \ as filename: print(filename.name) pickle.dump(cv_svm_reduce, filename) else: ## 4.2) Loading the results for one process ## =================================================== try: store = StoreFs(dirpath=dirpath, clear=False) cv_svm_one_proc = store.load() with open(os.path.join(directory, "tmp_save_results"), 'r+') \ as filename: cv_svm_reduce_one_proc = pickle.load(filename) ## 5.2) Comparing results to the results for one process ## =================================================== print("\nComparing %i proc with one proc" % self.n_proc) self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc)) self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc)) except KeyError: print("Warning: ") print("No previous tree detected, no possible "\ "comparison of results")