def test_examples_local_engine(self):
        list_all_examples = get_wf_example_classes()
        for example in list_all_examples:
#            if example().__class__.__name__ == "WFExample1" or\
#                example().__class__.__name__ == "WFExample2":
                # example = list_all_examples[0]
                wf = example().get_workflow()
                local_engine_wf = example().get_workflow()
                sfw_engine_wf = example().get_workflow()
                wf.run(X=self.X, y=self.y)
                local_engine = LocalEngine(tree_root=local_engine_wf,
                                           num_processes=self.n_cores)
                local_engine_wf = local_engine.run(X=self.X, y=self.y)
                sfw_engine = SomaWorkflowEngine(tree_root=sfw_engine_wf,
                                                num_processes=self.n_cores,
                                                #resource_id="ed203246@gabriel",
                                                #login="******",
                                                remove_finished_wf=False,
                                                remove_local_tree=False
                                                )
                sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
                self.assertTrue(compare_two_node(wf, local_engine_wf))
                self.assertTrue(compare_two_node(wf, sfw_engine_wf))
                self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
                self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
예제 #2
0
 def test_examples_local_engine(self):
     list_all_examples = get_wf_example_classes()
     for example in list_all_examples:
         #            if example().__class__.__name__ == "WFExample1" or\
         #                example().__class__.__name__ == "WFExample2":
         # example = list_all_examples[0]
         wf = example().get_workflow()
         local_engine_wf = example().get_workflow()
         sfw_engine_wf = example().get_workflow()
         wf.run(X=self.X, y=self.y)
         local_engine = LocalEngine(tree_root=local_engine_wf,
                                    num_processes=self.n_cores)
         local_engine_wf = local_engine.run(X=self.X, y=self.y)
         sfw_engine = SomaWorkflowEngine(
             tree_root=sfw_engine_wf,
             num_processes=self.n_cores,
             #resource_id="ed203246@gabriel",
             #login="******",
             remove_finished_wf=False,
             remove_local_tree=False)
         sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
         self.assertTrue(compare_two_node(wf, local_engine_wf))
         self.assertTrue(compare_two_node(wf, sfw_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
    def test_prev_state_methods(self):
        ## 1) Build dataset
        ## ================================================
        X, y = datasets.make_classification(n_samples=5,
                                            n_features=20,
                                            n_informative=2)
        Xy = {"X": X, "y": y}
        methods = Methods(*[TOY_CLF(v_lambda=v_lambda)
                            for v_lambda in [2, 1]])
        methods.run(**Xy)

        ps_methods = WarmStartMethods(*[TOY_CLF(v_lambda=v_lambda)
                                        for v_lambda in [2, 1]])
        ps_methods.run(**Xy)
        self.assertTrue(compare_two_node(methods, ps_methods))
        self.assertTrue(comp_2wf_reduce_res(methods, ps_methods))
    def test_prev_state_methods(self):
        ## 1) Build dataset
        ## ================================================
        X, y = datasets.make_classification(n_samples=5,
                                            n_features=20,
                                            n_informative=2)
        Xy = {"X": X, "y": y}
        methods = Methods(*[TOY_CLF(v_lambda=v_lambda)
                            for v_lambda in [2, 1]])
        methods.run(**Xy)

        ps_methods = WarmStartMethods(*[TOY_CLF(v_lambda=v_lambda)
                                        for v_lambda in [2, 1]])
        ps_methods.run(**Xy)
        self.assertTrue(compare_two_node(methods, ps_methods))
        self.assertTrue(comp_2wf_reduce_res(methods, ps_methods))
    def test_cv_best_search_refit_parallel(self):
        n_folds = 2
        n_folds_nested = 3
        k_values = [1, 2]
        C_values = [1, 2]
        n_samples = 500
        n_features = 10000
        n_cores = 2
        X, y = datasets.make_classification(n_samples=n_samples,
                                            n_features=n_features,
                                            n_informative=5)
        # epac workflow for paralle computing
        pipelines = Methods(*[
            Pipe(SelectKBest(
                k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values]))
            for k in k_values
        ])
        pipeline = CVBestSearchRefitParallel(pipelines, n_folds=n_folds_nested)
        wf = CV(pipeline, n_folds=n_folds)

        sfw_engine = SomaWorkflowEngine(tree_root=wf,
                                        num_processes=n_cores,
                                        remove_finished_wf=False,
                                        remove_local_tree=False)
        sfw_engine_wf = sfw_engine.run(X=X, y=y)

        # epac workflow for normal node computing
        pipelines2 = Methods(*[
            Pipe(SelectKBest(
                k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values]))
            for k in k_values
        ])
        pipeline2 = CVBestSearchRefitParallel(pipelines2,
                                              n_folds=n_folds_nested)
        wf2 = CV(pipeline2, n_folds=n_folds)
        wf2.run(X=X, y=y)

        self.assertTrue(compare_two_node(sfw_engine_wf, wf2))
        self.assertTrue(comp_2wf_reduce_res(sfw_engine_wf, wf2))
    def test_cv_best_search_refit_parallel(self):
        n_folds = 2
        n_folds_nested = 3
        k_values = [1, 2]
        C_values = [1, 2]
        n_samples = 500
        n_features = 10000
        n_cores = 2
        X, y = datasets.make_classification(n_samples=n_samples,
                                            n_features=n_features,
                                            n_informative=5)
        # epac workflow for paralle computing
        pipelines = Methods(*[Pipe(SelectKBest(k=k),
                              Methods(*[SVC(kernel="linear", C=C)
                              for C in C_values]))
                              for k in k_values])
        pipeline = CVBestSearchRefitParallel(pipelines,
                                             n_folds=n_folds_nested)
        wf = CV(pipeline, n_folds=n_folds)

        sfw_engine = SomaWorkflowEngine(tree_root=wf,
                                        num_processes=n_cores,
                                        remove_finished_wf=False,
                                        remove_local_tree=False)
        sfw_engine_wf = sfw_engine.run(X=X, y=y)

        # epac workflow for normal node computing
        pipelines2 = Methods(*[Pipe(SelectKBest(k=k),
                              Methods(*[SVC(kernel="linear", C=C)
                              for C in C_values]))
                              for k in k_values])
        pipeline2 = CVBestSearchRefitParallel(pipelines2,
                                             n_folds=n_folds_nested)
        wf2 = CV(pipeline2, n_folds=n_folds)
        wf2.run(X=X, y=y)

        self.assertTrue(compare_two_node(sfw_engine_wf, wf2))
        self.assertTrue(comp_2wf_reduce_res(sfw_engine_wf, wf2))
예제 #7
0
    def test_memmapping(self):
        ## 1) Building dataset
        ## ============================================================
        if self.memmap:
            # If the proc is 1, always generate the matrix
            # Otherwise, load it if it exists, or create it if it doesn't
            writing_mode = (self.n_proc == 1)
            X = create_mmat(self.n_samples,
                            self.n_features,
                            dir=self.directory,
                            writing_mode=writing_mode)
            y = create_array(self.n_samples, [0, 1],
                             dir=self.directory,
                             writing_mode=writing_mode)
            Xy = dict(X=X, y=y)
        else:
            X, y = datasets.make_classification(n_samples=self.n_samples,
                                                n_features=self.n_features,
                                                n_informative=2,
                                                random_state=1)
            Xy = dict(X=X, y=y)
        ## 2) Building workflow
        ## =======================================================
        from sklearn.svm import SVC
        from epac import CV, Methods
        cv_svm_local = CV(Methods(*[SVC(
            kernel="linear"), SVC(kernel="rbf")]),
                          n_folds=3)

        cv_svm = None
        if self.is_swf:
            # Running on the cluster
            from epac import SomaWorkflowEngine
            mmap_mode = None
            if self.memmap:
                mmap_mode = "r+"
            swf_engine = SomaWorkflowEngine(
                cv_svm_local,
                num_processes=self.n_proc,
                resource_id="jl237561@gabriel",
                login="******",
                # remove_finished_wf=False,
                # remove_local_tree=False,
                mmap_mode=mmap_mode,
                queue="Global_long")

            cv_svm = swf_engine.run(**Xy)

            # Printing information about the jobs
            time.sleep(2)
            print('')
            sum_memory = 0
            max_time_cost = 0
            for job_info in swf_engine.engine_info:
                print(
                    "mem_cost = {0}, vmem_cost = {1}, time_cost = {2}".format(
                        job_info.mem_cost, job_info.vmem_cost,
                        job_info.time_cost))
                sum_memory += job_info.mem_cost
                if max_time_cost < job_info.time_cost:
                    max_time_cost = job_info.time_cost
            print("sum_memory = ", sum_memory)
            print("max_time_cost = ", max_time_cost)
        else:
            # Running on the local machine
            from epac import LocalEngine
            local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc)
            cv_svm = local_engine.run(**Xy)

        cv_svm_reduce = cv_svm.reduce()
        print("\n -> Reducing results")
        print(cv_svm_reduce)

        # Creating the directory to save results, if it doesn't exist
        dirname = 'tmp_save_tree/'
        if self.directory is None:
            directory = '/tmp'
        else:
            directory = self.directory
        if not os.path.isdir(directory):
            os.mkdir(directory)
        dirpath = os.path.join(directory, dirname)
        if not os.path.isdir(dirpath):
            os.mkdir(dirpath)

        if self.n_proc == 1:
            ## 4.1) Saving results on the disk for one process
            ## ===================================================
            store = StoreFs(dirpath=dirpath, clear=True)
            cv_svm.save_tree(store=store)

            with open(os.path.join(directory, "tmp_save_results"), 'w+') \
                    as filename:
                print(filename.name)
                pickle.dump(cv_svm_reduce, filename)

        else:
            ## 4.2) Loading the results for one process
            ## ===================================================
            try:
                store = StoreFs(dirpath=dirpath, clear=False)
                cv_svm_one_proc = store.load()

                with open(os.path.join(directory, "tmp_save_results"), 'r+') \
                        as filename:
                    cv_svm_reduce_one_proc = pickle.load(filename)

                ## 5.2) Comparing results to the results for one process
                ## ===================================================
                print("\nComparing %i proc with one proc" % self.n_proc)
                self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc))
                self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc))
            except KeyError:
                print("Warning: ")
                print("No previous tree detected, no possible "\
                    "comparison of results")
예제 #8
0
    def test_memmapping(self):
        ## 1) Building dataset
        ## ============================================================
        if self.memmap:
            # If the proc is 1, always generate the matrix
            # Otherwise, load it if it exists, or create it if it doesn't
            writing_mode = (self.n_proc == 1)
            X = create_mmat(self.n_samples, self.n_features,
                            dir=self.directory,
                            writing_mode=writing_mode)
            y = create_array(self.n_samples, [0, 1], dir=self.directory,
                             writing_mode=writing_mode)
            Xy = dict(X=X, y=y)
        else:
            X, y = datasets.make_classification(n_samples=self.n_samples,
                                                n_features=self.n_features,
                                                n_informative=2,
                                                random_state=1)
            Xy = dict(X=X, y=y)
        ## 2) Building workflow
        ## =======================================================
        from sklearn.svm import SVC
        from epac import CV, Methods
        cv_svm_local = CV(Methods(*[SVC(kernel="linear"),
                                    SVC(kernel="rbf")]), n_folds=3)

        cv_svm = None
        if self.is_swf:
            # Running on the cluster
            from epac import SomaWorkflowEngine
            mmap_mode = None
            if self.memmap:
                mmap_mode = "r+"
            swf_engine = SomaWorkflowEngine(cv_svm_local,
                                            num_processes=self.n_proc,
                                            resource_id="jl237561@gabriel",
                                            login="******",
                                            # remove_finished_wf=False,
                                            # remove_local_tree=False,
                                            mmap_mode=mmap_mode,
                                            queue="Global_long")

            cv_svm = swf_engine.run(**Xy)

            # Printing information about the jobs
            time.sleep(2)
            print ''
            sum_memory = 0
            max_time_cost = 0
            for job_info in swf_engine.engine_info:
                print "mem_cost=", job_info.mem_cost, \
                      ", vmem_cost=", job_info.vmem_cost, \
                      ", time_cost=", job_info.time_cost
                sum_memory += job_info.mem_cost
                if max_time_cost < job_info.time_cost:
                    max_time_cost = job_info.time_cost
            print "sum_memory =", sum_memory
            print "max_time_cost =", max_time_cost
        else:
            # Running on the local machine
            from epac import LocalEngine
            local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc)
            cv_svm = local_engine.run(**Xy)

        cv_svm_reduce = cv_svm.reduce()
        print "\n -> Reducing results"
        print cv_svm_reduce

        # Creating the directory to save results, if it doesn't exist
        dirname = 'tmp_save_tree/'
        if self.directory is None:
            directory = '/tmp'
        else:
            directory = self.directory
        if not os.path.isdir(directory):
            os.mkdir(directory)
        dirpath = os.path.join(directory, dirname)
        if not os.path.isdir(dirpath):
            os.mkdir(dirpath)

        if self.n_proc == 1:
            ## 4.1) Saving results on the disk for one process
            ## ===================================================
            store = StoreFs(dirpath=dirpath, clear=True)
            cv_svm.save_tree(store=store)

            with open(os.path.join(directory, "tmp_save_results"), 'w+') \
                    as filename:
                print filename.name
                pickle.dump(cv_svm_reduce, filename)

        else:
            ## 4.2) Loading the results for one process
            ## ===================================================
            try:
                store = StoreFs(dirpath=dirpath, clear=False)
                cv_svm_one_proc = store.load()

                with open(os.path.join(directory, "tmp_save_results"), 'r+') \
                        as filename:
                    cv_svm_reduce_one_proc = pickle.load(filename)

                ## 5.2) Comparing results to the results for one process
                ## ===================================================
                print "\nComparing %i proc with one proc" % self.n_proc
                self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc))
                self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc))
            except KeyError:
                print "Warning: "
                print "No previous tree detected, no possible "\
                    "comparison of results"