def load_from_gui(soma_workflow_dirpath): """ Result tree can be loaded from the working directory (soma_workflow_dirpath). """ store = StoreFs(dirpath=os.path.join(soma_workflow_dirpath, SomaWorkflowEngine.tree_root_relative_path)) tree_root = store.load() return tree_root
def load_from_gui(soma_workflow_dirpath): ''' Result tree can be loaded from the working directory (soma_workflow_dirpath). ''' store = StoreFs(dirpath=os.path.join( soma_workflow_dirpath, LocalEngine.tree_root_relative_path)) tree_root = store.load() return tree_root
def test_peristence_perm_cv_parmethods_pipe_vs_sklearn(self): key_y_pred = 'y' + conf.SEP + conf.PREDICTION X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2) n_folds_nested = 2 #random_state = 0 C_values = [.1, 0.5, 1, 2, 5] kernels = ["linear", "rbf"] # With EPAC methods = Methods( *[SVC(C=C, kernel=kernel) for C in C_values for kernel in kernels]) wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested) # Save workflow # ------------- import tempfile #store = StoreFs("/tmp/toto", clear=True) store = StoreFs(tempfile.mktemp()) wf.save_tree(store=store) wf = store.load() wf.run(X=X, y=y) ## Save results wf.save_tree(store=store) wf = store.load() r_epac = wf.reduce().values()[0] # - Without EPAC r_sklearn = dict() clf = SVC(kernel="linear") parameters = {'C': C_values, 'kernel': kernels} cv_nested = StratifiedKFold(y=y, n_folds=n_folds_nested) gscv = grid_search.GridSearchCV(clf, parameters, cv=cv_nested) gscv.fit(X, y) r_sklearn[key_y_pred] = gscv.predict(X) r_sklearn[conf.BEST_PARAMS] = gscv.best_params_ r_sklearn[conf.BEST_PARAMS]['name'] = 'SVC' # - Comparisons comp = np.all(r_epac[key_y_pred] == r_sklearn[key_y_pred]) self.assertTrue(comp, u'Diff CVBestSearchRefitParallel: prediction') comp = np.all([ r_epac[conf.BEST_PARAMS][0][p] == r_sklearn[conf.BEST_PARAMS][p] for p in r_sklearn[conf.BEST_PARAMS] ]) self.assertTrue(comp, u'Diff CVBestSearchRefitParallel: best parameters')
def test_peristence_perm_cv_parmethods_pipe_vs_sklearn(self): key_y_pred = 'y' + conf.SEP + conf.PREDICTION X, y = datasets.make_classification(n_samples=12, n_features=10, n_informative=2) n_folds_nested = 2 #random_state = 0 C_values = [.1, 0.5, 1, 2, 5] kernels = ["linear", "rbf"] # With EPAC methods = Methods(*[SVC(C=C, kernel=kernel) for C in C_values for kernel in kernels]) wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested) # Save workflow # ------------- import tempfile #store = StoreFs("/tmp/toto", clear=True) store = StoreFs(tempfile.mktemp()) wf.save_tree(store=store) wf = store.load() wf.run(X=X, y=y) ## Save results wf.save_tree(store=store) wf = store.load() r_epac = wf.reduce().values()[0] # - Without EPAC r_sklearn = dict() clf = SVC(kernel="linear") parameters = {'C': C_values, 'kernel': kernels} cv_nested = StratifiedKFold(y=y, n_folds=n_folds_nested) gscv = grid_search.GridSearchCV(clf, parameters, cv=cv_nested) gscv.fit(X, y) r_sklearn[key_y_pred] = gscv.predict(X) r_sklearn[conf.BEST_PARAMS] = gscv.best_params_ r_sklearn[conf.BEST_PARAMS]['name'] = 'SVC' # - Comparisons comp = np.all(r_epac[key_y_pred] == r_sklearn[key_y_pred]) self.assertTrue(comp, u'Diff CVBestSearchRefitParallel: prediction') comp = np.all([r_epac[conf.BEST_PARAMS][0][p] == r_sklearn[conf.BEST_PARAMS][p] for p in r_sklearn[conf.BEST_PARAMS]]) self.assertTrue(comp, u'Diff CVBestSearchRefitParallel: best parameters')
def test_peristence_load_and_fit_predict(self): X, y = datasets.make_classification(n_samples=20, n_features=10, n_informative=2) n_folds = 2 n_folds_nested = 3 k_values = [1, 2] C_values = [1, 2] pipelines = Methods(*[ Pipe(SelectKBest(k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values])) for k in k_values]) pipeline = CVBestSearchRefit(pipelines, n_folds=n_folds_nested) tree_mem = CV(pipeline, n_folds=n_folds, reducer=ClassificationReport(keep=False)) # Save Tree import tempfile store = StoreFs(dirpath=tempfile.mkdtemp(), clear=True) tree_mem.save_tree(store=store) tree_mem.run(X=X, y=y) res_mem = tree_mem.reduce().values()[0] # Reload Tree tree_fs_noresults = store.load() tree_fs_noresults.run(X=X, y=y) res_fs_noresults = tree_fs_noresults.reduce().values()[0] # Save with results tree_fs_noresults.save_tree(store=store) tree_fs_withresults = store.load() res_fs_withresults = tree_fs_withresults.reduce().values()[0] # # Compare comp = np.all([ np.all( np.asarray(res_mem[k]) == np.asarray(res_fs_noresults[k])) and np.all(np.asarray(res_fs_noresults[k]) == np.asarray(res_fs_withresults[k])) for k in res_mem]) self.assertTrue(comp)
def test_peristence_load_and_fit_predict(self): X, y = datasets.make_classification(n_samples=20, n_features=10, n_informative=2) n_folds = 2 n_folds_nested = 3 k_values = [1, 2] C_values = [1, 2] pipelines = Methods(*[ Pipe(SelectKBest( k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values])) for k in k_values ]) pipeline = CVBestSearchRefitParallel(pipelines, n_folds=n_folds_nested) tree_mem = CV(pipeline, n_folds=n_folds, reducer=ClassificationReport(keep=False)) # Save Tree import tempfile store = StoreFs(dirpath=tempfile.mkdtemp(), clear=True) tree_mem.save_tree(store=store) tree_mem.run(X=X, y=y) res_mem = tree_mem.reduce().values()[0] # Reload Tree tree_fs_noresults = store.load() tree_fs_noresults.run(X=X, y=y) res_fs_noresults = tree_fs_noresults.reduce().values()[0] # Save with results tree_fs_noresults.save_tree(store=store) tree_fs_withresults = store.load() res_fs_withresults = tree_fs_withresults.reduce().values()[0] # Compare comp = np.all([ np.all(np.asarray(res_mem[k]) == np.asarray(res_fs_noresults[k])) and np.all( np.asarray(res_fs_noresults[k]) == np.asarray( res_fs_withresults[k])) for k in res_mem ]) self.assertTrue(comp)
def export_to_gui(self, soma_workflow_dirpath, **Xy): ''' Example ------- see the directory of "examples/run_somaworkflow_gui.py" in epac ''' try: from soma_workflow.client import Job, Workflow from soma_workflow.client import Helper, FileTransfer except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError if not os.path.exists(soma_workflow_dirpath): os.makedirs(soma_workflow_dirpath) tmp_work_dir_path = soma_workflow_dirpath cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== # np.savez(os.path.join(tmp_work_dir_path, # SomaWorkflowEngine.dataset_relative_path), **Xy) db_size = estimate_dataset_size(**Xy) db_size = int(db_size / (1024 * 1024)) # convert it into mega byte save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = self._create_jobs(keysfile_list, is_run_local=False, ft_working_directory=ft_working_directory) soma_workflow = Workflow(jobs=jobs) if soma_workflow_dirpath and soma_workflow_dirpath != "": out_soma_workflow_file = os.path.join( soma_workflow_dirpath, SomaWorkflowEngine.open_me_by_soma_workflow_gui) Helper.serialize(out_soma_workflow_file, soma_workflow) os.chdir(cur_work_dir)
def run(self, **Xy): """Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="transform", ... num_processes=3, remove_finished_wf=False) >>> tree_root_node = sfw_engine.run(**Xy) light mode >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}, {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}]) """ try: from soma_workflow.client import Job, Workflow from soma_workflow.client import Helper, FileTransfer from soma_workflow.client import WorkflowController except ImportError: errmsg = ( "No soma-workflow is found. " "Please verify your soma-worklow" "on your computer (e.g. PYTHONPATH) \n" ) sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) is_run_local = False if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() is_run_local = True # print "is_run_local=", is_run_local if not is_run_local: ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") else: ft_working_directory = tmp_work_dir_path ## Save the database and tree to working directory ## =============================================== # np.savez(os.path.join(tmp_work_dir_path, # SomaWorkflowEngine.dataset_relative_path), **Xy) save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy) store = StoreFs(dirpath=os.path.join(tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = self._create_jobs(keysfile_list, is_run_local, ft_working_directory) soma_workflow = Workflow(jobs=jobs) controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow", queue=self.queue) Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) self.engine_info = self.get_engine_info(controller, wf_id) if self.remove_finished_wf: controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree: shutil.rmtree(tmp_work_dir_path) return self.tree_root
def export_to_gui(self, soma_workflow_dirpath, **Xy): ''' Example ------- see the directory of "examples/run_somaworkflow_gui.py" in epac ''' try: from soma.workflow.client import Job, Workflow from soma.workflow.client import Helper, FileTransfer except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError if not os.path.exists(soma_workflow_dirpath): os.makedirs(soma_workflow_dirpath) tmp_work_dir_path = soma_workflow_dirpath cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== np.savez( os.path.join(tmp_work_dir_path, SomaWorkflowEngine.dataset_relative_path), **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = [ Job(command=[ u"epac_mapper", u'--datasets', '"%s"' % (SomaWorkflowEngine.dataset_relative_path), u'--keysfile', '"%s"' % (nodesfile) ], referenced_input_files=[ft_working_directory], referenced_output_files=[ft_working_directory], name="epac_job_key=%s" % (nodesfile), working_directory=ft_working_directory) for nodesfile in keysfile_list ] soma_workflow = Workflow(jobs=jobs) if soma_workflow_dirpath and soma_workflow_dirpath != "": out_soma_workflow_file = os.path.join( soma_workflow_dirpath, SomaWorkflowEngine.open_me_by_soma_workflow_gui) Helper.serialize(out_soma_workflow_file, soma_workflow) os.chdir(cur_work_dir)
def run(self, **Xy): '''Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="trasform", ... num_processes=3) >>> tree_root_node = sfw_engine.run(**Xy) >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}, {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}]) ''' try: from soma.workflow.client import Job, Workflow from soma.workflow.client import Helper, FileTransfer from soma.workflow.client import WorkflowController except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== np.savez( os.path.join(tmp_work_dir_path, SomaWorkflowEngine.dataset_relative_path), **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = [ Job(command=[ u"epac_mapper", u'--datasets', '"%s"' % (SomaWorkflowEngine.dataset_relative_path), u'--keysfile', '"%s"' % (nodesfile) ], referenced_input_files=[ft_working_directory], referenced_output_files=[ft_working_directory], name="epac_job_key=%s" % (nodesfile), working_directory=ft_working_directory) for nodesfile in keysfile_list ] soma_workflow = Workflow(jobs=jobs) if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path): shutil.rmtree(tmp_work_dir_path) return self.tree_root
def run(self, **Xy): '''Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="trasform", ... num_processes=3) >>> tree_root_node = sfw_engine.run(**Xy) >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}, {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}]) ''' try: from soma.workflow.client import Job, Workflow from soma.workflow.client import Helper, FileTransfer from soma.workflow.client import WorkflowController except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== np.savez(os.path.join(tmp_work_dir_path, SomaWorkflowEngine.dataset_relative_path), **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = [Job(command=[u"epac_mapper", u'--datasets', '"%s"' % (SomaWorkflowEngine.dataset_relative_path), u'--keysfile', '"%s"' % (nodesfile)], referenced_input_files=[ft_working_directory], referenced_output_files=[ft_working_directory], name="epac_job_key=%s" % (nodesfile), working_directory=ft_working_directory) for nodesfile in keysfile_list] soma_workflow = Workflow(jobs=jobs) if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path): shutil.rmtree(tmp_work_dir_path) return self.tree_root
def test_memmapping(self): ## 1) Building dataset ## ============================================================ if self.memmap: # If the proc is 1, always generate the matrix # Otherwise, load it if it exists, or create it if it doesn't writing_mode = (self.n_proc == 1) X = create_mmat(self.n_samples, self.n_features, dir=self.directory, writing_mode=writing_mode) y = create_array(self.n_samples, [0, 1], dir=self.directory, writing_mode=writing_mode) Xy = dict(X=X, y=y) else: X, y = datasets.make_classification(n_samples=self.n_samples, n_features=self.n_features, n_informative=2, random_state=1) Xy = dict(X=X, y=y) ## 2) Building workflow ## ======================================================= from sklearn.svm import SVC from epac import CV, Methods cv_svm_local = CV(Methods(*[SVC( kernel="linear"), SVC(kernel="rbf")]), n_folds=3) cv_svm = None if self.is_swf: # Running on the cluster from epac import SomaWorkflowEngine mmap_mode = None if self.memmap: mmap_mode = "r+" swf_engine = SomaWorkflowEngine( cv_svm_local, num_processes=self.n_proc, resource_id="jl237561@gabriel", login="******", # remove_finished_wf=False, # remove_local_tree=False, mmap_mode=mmap_mode, queue="Global_long") cv_svm = swf_engine.run(**Xy) # Printing information about the jobs time.sleep(2) print('') sum_memory = 0 max_time_cost = 0 for job_info in swf_engine.engine_info: print( "mem_cost = {0}, vmem_cost = {1}, time_cost = {2}".format( job_info.mem_cost, job_info.vmem_cost, job_info.time_cost)) sum_memory += job_info.mem_cost if max_time_cost < job_info.time_cost: max_time_cost = job_info.time_cost print("sum_memory = ", sum_memory) print("max_time_cost = ", max_time_cost) else: # Running on the local machine from epac import LocalEngine local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc) cv_svm = local_engine.run(**Xy) cv_svm_reduce = cv_svm.reduce() print("\n -> Reducing results") print(cv_svm_reduce) # Creating the directory to save results, if it doesn't exist dirname = 'tmp_save_tree/' if self.directory is None: directory = '/tmp' else: directory = self.directory if not os.path.isdir(directory): os.mkdir(directory) dirpath = os.path.join(directory, dirname) if not os.path.isdir(dirpath): os.mkdir(dirpath) if self.n_proc == 1: ## 4.1) Saving results on the disk for one process ## =================================================== store = StoreFs(dirpath=dirpath, clear=True) cv_svm.save_tree(store=store) with open(os.path.join(directory, "tmp_save_results"), 'w+') \ as filename: print(filename.name) pickle.dump(cv_svm_reduce, filename) else: ## 4.2) Loading the results for one process ## =================================================== try: store = StoreFs(dirpath=dirpath, clear=False) cv_svm_one_proc = store.load() with open(os.path.join(directory, "tmp_save_results"), 'r+') \ as filename: cv_svm_reduce_one_proc = pickle.load(filename) ## 5.2) Comparing results to the results for one process ## =================================================== print("\nComparing %i proc with one proc" % self.n_proc) self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc)) self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc)) except KeyError: print("Warning: ") print("No previous tree detected, no possible "\ "comparison of results")
def test_memmapping(self): ## 1) Building dataset ## ============================================================ if self.memmap: # If the proc is 1, always generate the matrix # Otherwise, load it if it exists, or create it if it doesn't writing_mode = (self.n_proc == 1) X = create_mmat(self.n_samples, self.n_features, dir=self.directory, writing_mode=writing_mode) y = create_array(self.n_samples, [0, 1], dir=self.directory, writing_mode=writing_mode) Xy = dict(X=X, y=y) else: X, y = datasets.make_classification(n_samples=self.n_samples, n_features=self.n_features, n_informative=2, random_state=1) Xy = dict(X=X, y=y) ## 2) Building workflow ## ======================================================= from sklearn.svm import SVC from epac import CV, Methods cv_svm_local = CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3) cv_svm = None if self.is_swf: # Running on the cluster from epac import SomaWorkflowEngine mmap_mode = None if self.memmap: mmap_mode = "r+" swf_engine = SomaWorkflowEngine(cv_svm_local, num_processes=self.n_proc, resource_id="jl237561@gabriel", login="******", # remove_finished_wf=False, # remove_local_tree=False, mmap_mode=mmap_mode, queue="Global_long") cv_svm = swf_engine.run(**Xy) # Printing information about the jobs time.sleep(2) print '' sum_memory = 0 max_time_cost = 0 for job_info in swf_engine.engine_info: print "mem_cost=", job_info.mem_cost, \ ", vmem_cost=", job_info.vmem_cost, \ ", time_cost=", job_info.time_cost sum_memory += job_info.mem_cost if max_time_cost < job_info.time_cost: max_time_cost = job_info.time_cost print "sum_memory =", sum_memory print "max_time_cost =", max_time_cost else: # Running on the local machine from epac import LocalEngine local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc) cv_svm = local_engine.run(**Xy) cv_svm_reduce = cv_svm.reduce() print "\n -> Reducing results" print cv_svm_reduce # Creating the directory to save results, if it doesn't exist dirname = 'tmp_save_tree/' if self.directory is None: directory = '/tmp' else: directory = self.directory if not os.path.isdir(directory): os.mkdir(directory) dirpath = os.path.join(directory, dirname) if not os.path.isdir(dirpath): os.mkdir(dirpath) if self.n_proc == 1: ## 4.1) Saving results on the disk for one process ## =================================================== store = StoreFs(dirpath=dirpath, clear=True) cv_svm.save_tree(store=store) with open(os.path.join(directory, "tmp_save_results"), 'w+') \ as filename: print filename.name pickle.dump(cv_svm_reduce, filename) else: ## 4.2) Loading the results for one process ## =================================================== try: store = StoreFs(dirpath=dirpath, clear=False) cv_svm_one_proc = store.load() with open(os.path.join(directory, "tmp_save_results"), 'r+') \ as filename: cv_svm_reduce_one_proc = pickle.load(filename) ## 5.2) Comparing results to the results for one process ## =================================================== print "\nComparing %i proc with one proc" % self.n_proc self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc)) self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc)) except KeyError: print "Warning: " print "No previous tree detected, no possible "\ "comparison of results"