コード例 #1
0
ファイル: engine.py プロジェクト: neurospin/pylearn-epac
 def load_from_gui(soma_workflow_dirpath):
     """
     Result tree can be loaded from the working directory
     (soma_workflow_dirpath).
     """
     store = StoreFs(dirpath=os.path.join(soma_workflow_dirpath, SomaWorkflowEngine.tree_root_relative_path))
     tree_root = store.load()
     return tree_root
コード例 #2
0
 def load_from_gui(soma_workflow_dirpath):
     '''
     Result tree can be loaded from the working directory
     (soma_workflow_dirpath).
     '''
     store = StoreFs(dirpath=os.path.join(
         soma_workflow_dirpath, LocalEngine.tree_root_relative_path))
     tree_root = store.load()
     return tree_root
コード例 #3
0
    def test_peristence_perm_cv_parmethods_pipe_vs_sklearn(self):
        key_y_pred = 'y' + conf.SEP + conf.PREDICTION
        X, y = datasets.make_classification(n_samples=12,
                                            n_features=10,
                                            n_informative=2)
        n_folds_nested = 2
        #random_state = 0
        C_values = [.1, 0.5, 1, 2, 5]
        kernels = ["linear", "rbf"]
        # With EPAC
        methods = Methods(
            *[SVC(C=C, kernel=kernel) for C in C_values for kernel in kernels])
        wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested)
        # Save workflow
        # -------------
        import tempfile
        #store = StoreFs("/tmp/toto", clear=True)
        store = StoreFs(tempfile.mktemp())
        wf.save_tree(store=store)
        wf = store.load()
        wf.run(X=X, y=y)
        ## Save results
        wf.save_tree(store=store)
        wf = store.load()
        r_epac = wf.reduce().values()[0]

        # - Without EPAC
        r_sklearn = dict()
        clf = SVC(kernel="linear")
        parameters = {'C': C_values, 'kernel': kernels}
        cv_nested = StratifiedKFold(y=y, n_folds=n_folds_nested)
        gscv = grid_search.GridSearchCV(clf, parameters, cv=cv_nested)
        gscv.fit(X, y)
        r_sklearn[key_y_pred] = gscv.predict(X)
        r_sklearn[conf.BEST_PARAMS] = gscv.best_params_
        r_sklearn[conf.BEST_PARAMS]['name'] = 'SVC'

        # - Comparisons
        comp = np.all(r_epac[key_y_pred] == r_sklearn[key_y_pred])
        self.assertTrue(comp, u'Diff CVBestSearchRefitParallel: prediction')
        comp = np.all([
            r_epac[conf.BEST_PARAMS][0][p] == r_sklearn[conf.BEST_PARAMS][p]
            for p in r_sklearn[conf.BEST_PARAMS]
        ])
        self.assertTrue(comp,
                        u'Diff CVBestSearchRefitParallel: best parameters')
コード例 #4
0
    def test_peristence_perm_cv_parmethods_pipe_vs_sklearn(self):
        key_y_pred = 'y' + conf.SEP + conf.PREDICTION
        X, y = datasets.make_classification(n_samples=12, n_features=10,
                                            n_informative=2)
        n_folds_nested = 2
        #random_state = 0
        C_values = [.1, 0.5, 1, 2, 5]
        kernels = ["linear", "rbf"]
        # With EPAC
        methods = Methods(*[SVC(C=C, kernel=kernel)
                          for C in C_values for kernel in kernels])
        wf = CVBestSearchRefitParallel(methods, n_folds=n_folds_nested)
        # Save workflow
        # -------------
        import tempfile
        #store = StoreFs("/tmp/toto", clear=True)
        store = StoreFs(tempfile.mktemp())
        wf.save_tree(store=store)
        wf = store.load()
        wf.run(X=X, y=y)
        ## Save results
        wf.save_tree(store=store)
        wf = store.load()
        r_epac = wf.reduce().values()[0]

        # - Without EPAC
        r_sklearn = dict()
        clf = SVC(kernel="linear")
        parameters = {'C': C_values, 'kernel': kernels}
        cv_nested = StratifiedKFold(y=y, n_folds=n_folds_nested)
        gscv = grid_search.GridSearchCV(clf, parameters, cv=cv_nested)
        gscv.fit(X, y)
        r_sklearn[key_y_pred] = gscv.predict(X)
        r_sklearn[conf.BEST_PARAMS] = gscv.best_params_
        r_sklearn[conf.BEST_PARAMS]['name'] = 'SVC'

        # - Comparisons
        comp = np.all(r_epac[key_y_pred] == r_sklearn[key_y_pred])
        self.assertTrue(comp, u'Diff CVBestSearchRefitParallel: prediction')
        comp = np.all([r_epac[conf.BEST_PARAMS][0][p] ==
                       r_sklearn[conf.BEST_PARAMS][p]
                       for p in r_sklearn[conf.BEST_PARAMS]])
        self.assertTrue(comp,
                        u'Diff CVBestSearchRefitParallel: best parameters')
コード例 #5
0
    def test_peristence_load_and_fit_predict(self):
        X, y = datasets.make_classification(n_samples=20, n_features=10,
                                        n_informative=2)
        n_folds = 2
        n_folds_nested = 3
        k_values = [1, 2]
        C_values = [1, 2]
        pipelines = Methods(*[
                            Pipe(SelectKBest(k=k),
                            Methods(*[SVC(kernel="linear", C=C)
                            for C in C_values]))
                            for k in k_values])

        pipeline = CVBestSearchRefit(pipelines,
                                     n_folds=n_folds_nested)

        tree_mem = CV(pipeline, n_folds=n_folds,
                      reducer=ClassificationReport(keep=False))
        # Save Tree
        import tempfile
        store = StoreFs(dirpath=tempfile.mkdtemp(), clear=True)
        tree_mem.save_tree(store=store)
        tree_mem.run(X=X, y=y)
        res_mem = tree_mem.reduce().values()[0]
        # Reload Tree
        tree_fs_noresults = store.load()
        tree_fs_noresults.run(X=X, y=y)
        res_fs_noresults = tree_fs_noresults.reduce().values()[0]
        # Save with results
        tree_fs_noresults.save_tree(store=store)
        tree_fs_withresults = store.load()
        res_fs_withresults = tree_fs_withresults.reduce().values()[0]
        #
        # Compare
        comp = np.all([
            np.all(
            np.asarray(res_mem[k]) == np.asarray(res_fs_noresults[k]))
            and
            np.all(np.asarray(res_fs_noresults[k]) ==
            np.asarray(res_fs_withresults[k]))
            for k in res_mem])
        self.assertTrue(comp)
コード例 #6
0
    def test_peristence_load_and_fit_predict(self):
        X, y = datasets.make_classification(n_samples=20,
                                            n_features=10,
                                            n_informative=2)
        n_folds = 2
        n_folds_nested = 3
        k_values = [1, 2]
        C_values = [1, 2]
        pipelines = Methods(*[
            Pipe(SelectKBest(
                k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values]))
            for k in k_values
        ])

        pipeline = CVBestSearchRefitParallel(pipelines, n_folds=n_folds_nested)

        tree_mem = CV(pipeline,
                      n_folds=n_folds,
                      reducer=ClassificationReport(keep=False))
        # Save Tree
        import tempfile
        store = StoreFs(dirpath=tempfile.mkdtemp(), clear=True)
        tree_mem.save_tree(store=store)
        tree_mem.run(X=X, y=y)
        res_mem = tree_mem.reduce().values()[0]
        # Reload Tree
        tree_fs_noresults = store.load()
        tree_fs_noresults.run(X=X, y=y)
        res_fs_noresults = tree_fs_noresults.reduce().values()[0]
        # Save with results
        tree_fs_noresults.save_tree(store=store)
        tree_fs_withresults = store.load()
        res_fs_withresults = tree_fs_withresults.reduce().values()[0]
        # Compare
        comp = np.all([
            np.all(np.asarray(res_mem[k]) == np.asarray(res_fs_noresults[k]))
            and np.all(
                np.asarray(res_fs_noresults[k]) == np.asarray(
                    res_fs_withresults[k])) for k in res_mem
        ])
        self.assertTrue(comp)
コード例 #7
0
ファイル: engine.py プロジェクト: ilgrad/pylearn-epac
 def export_to_gui(self, soma_workflow_dirpath, **Xy):
     '''
     Example
     -------
     see the directory of "examples/run_somaworkflow_gui.py" in epac
     '''
     try:
         from soma_workflow.client import Job, Workflow
         from soma_workflow.client import Helper, FileTransfer
     except ImportError:
         errmsg = "No soma-workflow is found. "\
             "Please verify your soma-worklow"\
             "on your computer (e.g. PYTHONPATH) \n"
         sys.stderr.write(errmsg)
         sys.stdout.write(errmsg)
         raise NoSomaWFError
     if not os.path.exists(soma_workflow_dirpath):
         os.makedirs(soma_workflow_dirpath)
     tmp_work_dir_path = soma_workflow_dirpath
     cur_work_dir = os.getcwd()
     os.chdir(tmp_work_dir_path)
     ft_working_directory = FileTransfer(is_input=True,
                                         client_path=tmp_work_dir_path,
                                         name="working directory")
     ## Save the database and tree to working directory
     ## ===============================================
     #        np.savez(os.path.join(tmp_work_dir_path,
     #                 SomaWorkflowEngine.dataset_relative_path), **Xy)
     db_size = estimate_dataset_size(**Xy)
     db_size = int(db_size / (1024 * 1024))  # convert it into mega byte
     save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
     store = StoreFs(dirpath=os.path.join(
         tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
     self.tree_root.save_tree(store=store)
     ## Subtree job allocation on disk
     ## ==============================
     node_input = NodesInput(self.tree_root.get_key())
     split_node_input = SplitNodesInput(self.tree_root,
                                        num_processes=self.num_processes)
     nodesinput_list = split_node_input.split(node_input)
     keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
     ## Build soma-workflow
     ## ===================
     jobs = self._create_jobs(keysfile_list,
                              is_run_local=False,
                              ft_working_directory=ft_working_directory)
     soma_workflow = Workflow(jobs=jobs)
     if soma_workflow_dirpath and soma_workflow_dirpath != "":
         out_soma_workflow_file = os.path.join(
             soma_workflow_dirpath,
             SomaWorkflowEngine.open_me_by_soma_workflow_gui)
         Helper.serialize(out_soma_workflow_file, soma_workflow)
     os.chdir(cur_work_dir)
コード例 #8
0
ファイル: engine.py プロジェクト: neurospin/pylearn-epac
    def run(self, **Xy):
        """Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="transform",
        ...                                 num_processes=3,
                                            remove_finished_wf=False)
        >>> tree_root_node = sfw_engine.run(**Xy)
        light mode
        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}])

        """
        try:
            from soma_workflow.client import Job, Workflow
            from soma_workflow.client import Helper, FileTransfer
            from soma_workflow.client import WorkflowController
        except ImportError:
            errmsg = (
                "No soma-workflow is found. " "Please verify your soma-worklow" "on your computer (e.g. PYTHONPATH) \n"
            )
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        is_run_local = False
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
            is_run_local = True
        # print "is_run_local=", is_run_local
        if not is_run_local:
            ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory")
        else:
            ft_working_directory = tmp_work_dir_path

        ## Save the database and tree to working directory
        ## ===============================================
        # np.savez(os.path.join(tmp_work_dir_path,
        # SomaWorkflowEngine.dataset_relative_path), **Xy)
        save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
        store = StoreFs(dirpath=os.path.join(tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list)

        ## Build soma-workflow
        ## ===================
        jobs = self._create_jobs(keysfile_list, is_run_local, ft_working_directory)
        soma_workflow = Workflow(jobs=jobs)

        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow", queue=self.queue)
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)

        self.engine_info = self.get_engine_info(controller, wf_id)

        if self.remove_finished_wf:
            controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree:
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
コード例 #9
0
 def export_to_gui(self, soma_workflow_dirpath, **Xy):
     '''
     Example
     -------
     see the directory of "examples/run_somaworkflow_gui.py" in epac
     '''
     try:
         from soma.workflow.client import Job, Workflow
         from soma.workflow.client import Helper, FileTransfer
     except ImportError:
         errmsg = "No soma-workflow is found. "\
             "Please verify your soma-worklow"\
             "on your computer (e.g. PYTHONPATH) \n"
         sys.stderr.write(errmsg)
         sys.stdout.write(errmsg)
         raise NoSomaWFError
     if not os.path.exists(soma_workflow_dirpath):
         os.makedirs(soma_workflow_dirpath)
     tmp_work_dir_path = soma_workflow_dirpath
     cur_work_dir = os.getcwd()
     os.chdir(tmp_work_dir_path)
     ft_working_directory = FileTransfer(is_input=True,
                                         client_path=tmp_work_dir_path,
                                         name="working directory")
     ## Save the database and tree to working directory
     ## ===============================================
     np.savez(
         os.path.join(tmp_work_dir_path,
                      SomaWorkflowEngine.dataset_relative_path), **Xy)
     store = StoreFs(dirpath=os.path.join(
         tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
     self.tree_root.save_tree(store=store)
     ## Subtree job allocation on disk
     ## ==============================
     node_input = NodesInput(self.tree_root.get_key())
     split_node_input = SplitNodesInput(self.tree_root,
                                        num_processes=self.num_processes)
     nodesinput_list = split_node_input.split(node_input)
     keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
     ## Build soma-workflow
     ## ===================
     jobs = [
         Job(command=[
             u"epac_mapper", u'--datasets',
             '"%s"' % (SomaWorkflowEngine.dataset_relative_path),
             u'--keysfile',
             '"%s"' % (nodesfile)
         ],
             referenced_input_files=[ft_working_directory],
             referenced_output_files=[ft_working_directory],
             name="epac_job_key=%s" % (nodesfile),
             working_directory=ft_working_directory)
         for nodesfile in keysfile_list
     ]
     soma_workflow = Workflow(jobs=jobs)
     if soma_workflow_dirpath and soma_workflow_dirpath != "":
         out_soma_workflow_file = os.path.join(
             soma_workflow_dirpath,
             SomaWorkflowEngine.open_me_by_soma_workflow_gui)
         Helper.serialize(out_soma_workflow_file, soma_workflow)
     os.chdir(cur_work_dir)
コード例 #10
0
    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="trasform",
        ...                                 num_processes=3)
        >>> tree_root_node = sfw_engine.run(**Xy)

        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}])
        '''
        try:
            from soma.workflow.client import Job, Workflow
            from soma.workflow.client import Helper, FileTransfer
            from soma.workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        ft_working_directory = FileTransfer(is_input=True,
                                            client_path=tmp_work_dir_path,
                                            name="working directory")
        ## Save the database and tree to working directory
        ## ===============================================
        np.savez(
            os.path.join(tmp_work_dir_path,
                         SomaWorkflowEngine.dataset_relative_path), **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
        ## Build soma-workflow
        ## ===================
        jobs = [
            Job(command=[
                u"epac_mapper", u'--datasets',
                '"%s"' % (SomaWorkflowEngine.dataset_relative_path),
                u'--keysfile',
                '"%s"' % (nodesfile)
            ],
                referenced_input_files=[ft_working_directory],
                referenced_output_files=[ft_working_directory],
                name="epac_job_key=%s" % (nodesfile),
                working_directory=ft_working_directory)
            for nodesfile in keysfile_list
        ]
        soma_workflow = Workflow(jobs=jobs)
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow")
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)
        controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path):
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
コード例 #11
0
ファイル: engine.py プロジェクト: duchesnay/pylearn-epac
    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="trasform",
        ...                                 num_processes=3)
        >>> tree_root_node = sfw_engine.run(**Xy)

        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}])
        '''
        try:
            from soma.workflow.client import Job, Workflow
            from soma.workflow.client import Helper, FileTransfer
            from soma.workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        ft_working_directory = FileTransfer(is_input=True,
                                        client_path=tmp_work_dir_path,
                                        name="working directory")
        ## Save the database and tree to working directory
        ## ===============================================
        np.savez(os.path.join(tmp_work_dir_path,
                 SomaWorkflowEngine.dataset_relative_path), **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path,
            SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = self._save_job_list(tmp_work_dir_path,
                                            nodesinput_list)
        ## Build soma-workflow
        ## ===================
        jobs = [Job(command=[u"epac_mapper",
                         u'--datasets', '"%s"' %
                         (SomaWorkflowEngine.dataset_relative_path),
                         u'--keysfile', '"%s"' %
                         (nodesfile)],
                referenced_input_files=[ft_working_directory],
                referenced_output_files=[ft_working_directory],
                name="epac_job_key=%s" % (nodesfile),
                working_directory=ft_working_directory)
                for nodesfile in keysfile_list]
        soma_workflow = Workflow(jobs=jobs)
        if not  self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
        controller = WorkflowController(self.resource_id,
                                        self.login,
                                        self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow")
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)
        controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path):
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
コード例 #12
0
    def test_memmapping(self):
        ## 1) Building dataset
        ## ============================================================
        if self.memmap:
            # If the proc is 1, always generate the matrix
            # Otherwise, load it if it exists, or create it if it doesn't
            writing_mode = (self.n_proc == 1)
            X = create_mmat(self.n_samples,
                            self.n_features,
                            dir=self.directory,
                            writing_mode=writing_mode)
            y = create_array(self.n_samples, [0, 1],
                             dir=self.directory,
                             writing_mode=writing_mode)
            Xy = dict(X=X, y=y)
        else:
            X, y = datasets.make_classification(n_samples=self.n_samples,
                                                n_features=self.n_features,
                                                n_informative=2,
                                                random_state=1)
            Xy = dict(X=X, y=y)
        ## 2) Building workflow
        ## =======================================================
        from sklearn.svm import SVC
        from epac import CV, Methods
        cv_svm_local = CV(Methods(*[SVC(
            kernel="linear"), SVC(kernel="rbf")]),
                          n_folds=3)

        cv_svm = None
        if self.is_swf:
            # Running on the cluster
            from epac import SomaWorkflowEngine
            mmap_mode = None
            if self.memmap:
                mmap_mode = "r+"
            swf_engine = SomaWorkflowEngine(
                cv_svm_local,
                num_processes=self.n_proc,
                resource_id="jl237561@gabriel",
                login="******",
                # remove_finished_wf=False,
                # remove_local_tree=False,
                mmap_mode=mmap_mode,
                queue="Global_long")

            cv_svm = swf_engine.run(**Xy)

            # Printing information about the jobs
            time.sleep(2)
            print('')
            sum_memory = 0
            max_time_cost = 0
            for job_info in swf_engine.engine_info:
                print(
                    "mem_cost = {0}, vmem_cost = {1}, time_cost = {2}".format(
                        job_info.mem_cost, job_info.vmem_cost,
                        job_info.time_cost))
                sum_memory += job_info.mem_cost
                if max_time_cost < job_info.time_cost:
                    max_time_cost = job_info.time_cost
            print("sum_memory = ", sum_memory)
            print("max_time_cost = ", max_time_cost)
        else:
            # Running on the local machine
            from epac import LocalEngine
            local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc)
            cv_svm = local_engine.run(**Xy)

        cv_svm_reduce = cv_svm.reduce()
        print("\n -> Reducing results")
        print(cv_svm_reduce)

        # Creating the directory to save results, if it doesn't exist
        dirname = 'tmp_save_tree/'
        if self.directory is None:
            directory = '/tmp'
        else:
            directory = self.directory
        if not os.path.isdir(directory):
            os.mkdir(directory)
        dirpath = os.path.join(directory, dirname)
        if not os.path.isdir(dirpath):
            os.mkdir(dirpath)

        if self.n_proc == 1:
            ## 4.1) Saving results on the disk for one process
            ## ===================================================
            store = StoreFs(dirpath=dirpath, clear=True)
            cv_svm.save_tree(store=store)

            with open(os.path.join(directory, "tmp_save_results"), 'w+') \
                    as filename:
                print(filename.name)
                pickle.dump(cv_svm_reduce, filename)

        else:
            ## 4.2) Loading the results for one process
            ## ===================================================
            try:
                store = StoreFs(dirpath=dirpath, clear=False)
                cv_svm_one_proc = store.load()

                with open(os.path.join(directory, "tmp_save_results"), 'r+') \
                        as filename:
                    cv_svm_reduce_one_proc = pickle.load(filename)

                ## 5.2) Comparing results to the results for one process
                ## ===================================================
                print("\nComparing %i proc with one proc" % self.n_proc)
                self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc))
                self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc))
            except KeyError:
                print("Warning: ")
                print("No previous tree detected, no possible "\
                    "comparison of results")
コード例 #13
0
    def test_memmapping(self):
        ## 1) Building dataset
        ## ============================================================
        if self.memmap:
            # If the proc is 1, always generate the matrix
            # Otherwise, load it if it exists, or create it if it doesn't
            writing_mode = (self.n_proc == 1)
            X = create_mmat(self.n_samples, self.n_features,
                            dir=self.directory,
                            writing_mode=writing_mode)
            y = create_array(self.n_samples, [0, 1], dir=self.directory,
                             writing_mode=writing_mode)
            Xy = dict(X=X, y=y)
        else:
            X, y = datasets.make_classification(n_samples=self.n_samples,
                                                n_features=self.n_features,
                                                n_informative=2,
                                                random_state=1)
            Xy = dict(X=X, y=y)
        ## 2) Building workflow
        ## =======================================================
        from sklearn.svm import SVC
        from epac import CV, Methods
        cv_svm_local = CV(Methods(*[SVC(kernel="linear"),
                                    SVC(kernel="rbf")]), n_folds=3)

        cv_svm = None
        if self.is_swf:
            # Running on the cluster
            from epac import SomaWorkflowEngine
            mmap_mode = None
            if self.memmap:
                mmap_mode = "r+"
            swf_engine = SomaWorkflowEngine(cv_svm_local,
                                            num_processes=self.n_proc,
                                            resource_id="jl237561@gabriel",
                                            login="******",
                                            # remove_finished_wf=False,
                                            # remove_local_tree=False,
                                            mmap_mode=mmap_mode,
                                            queue="Global_long")

            cv_svm = swf_engine.run(**Xy)

            # Printing information about the jobs
            time.sleep(2)
            print ''
            sum_memory = 0
            max_time_cost = 0
            for job_info in swf_engine.engine_info:
                print "mem_cost=", job_info.mem_cost, \
                      ", vmem_cost=", job_info.vmem_cost, \
                      ", time_cost=", job_info.time_cost
                sum_memory += job_info.mem_cost
                if max_time_cost < job_info.time_cost:
                    max_time_cost = job_info.time_cost
            print "sum_memory =", sum_memory
            print "max_time_cost =", max_time_cost
        else:
            # Running on the local machine
            from epac import LocalEngine
            local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc)
            cv_svm = local_engine.run(**Xy)

        cv_svm_reduce = cv_svm.reduce()
        print "\n -> Reducing results"
        print cv_svm_reduce

        # Creating the directory to save results, if it doesn't exist
        dirname = 'tmp_save_tree/'
        if self.directory is None:
            directory = '/tmp'
        else:
            directory = self.directory
        if not os.path.isdir(directory):
            os.mkdir(directory)
        dirpath = os.path.join(directory, dirname)
        if not os.path.isdir(dirpath):
            os.mkdir(dirpath)

        if self.n_proc == 1:
            ## 4.1) Saving results on the disk for one process
            ## ===================================================
            store = StoreFs(dirpath=dirpath, clear=True)
            cv_svm.save_tree(store=store)

            with open(os.path.join(directory, "tmp_save_results"), 'w+') \
                    as filename:
                print filename.name
                pickle.dump(cv_svm_reduce, filename)

        else:
            ## 4.2) Loading the results for one process
            ## ===================================================
            try:
                store = StoreFs(dirpath=dirpath, clear=False)
                cv_svm_one_proc = store.load()

                with open(os.path.join(directory, "tmp_save_results"), 'r+') \
                        as filename:
                    cv_svm_reduce_one_proc = pickle.load(filename)

                ## 5.2) Comparing results to the results for one process
                ## ===================================================
                print "\nComparing %i proc with one proc" % self.n_proc
                self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc))
                self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc))
            except KeyError:
                print "Warning: "
                print "No previous tree detected, no possible "\
                    "comparison of results"