Ejemplo n.º 1
0
 def test_engine_info(self):
     n_samples = 20
     n_features = 100
     n_proc = 2
     X, y = datasets.make_classification(n_samples=n_samples,
                                         n_features=n_features,
                                         n_informative=2,
                                         random_state=1)
     Xy = dict(X=X, y=y)
     cv_svm_local = CV(Methods(*[SVC(kernel="linear"),
                                 SVC(kernel="rbf")]),
                       n_folds=3)
     swf_engine = SomaWorkflowEngine(cv_svm_local,
                                     num_processes=n_proc,
                                     resource_id="jl237561@gabriel",
                                     login="******",
                                     remove_finished_wf=False,
                                     remove_local_tree=False,
                                     queue="Global_long")
     swf_engine.run(**Xy)
     print "engine_info ================"
     for job_info in swf_engine.engine_info:
         print "  job_info================="
         print "  mem_cost= ", job_info.mem_cost
         print "  vmem_cost= ", job_info.vmem_cost
         print "  time_cost= ", job_info.time_cost
         self.assertTrue(job_info.time_cost > 0)
Ejemplo n.º 2
0
    def test_examples_local_engine(self):
        list_all_examples = get_wf_example_classes()
        for example in list_all_examples:
#            if example().__class__.__name__ == "WFExample1" or\
#                example().__class__.__name__ == "WFExample2":
                # example = list_all_examples[0]
                wf = example().get_workflow()
                local_engine_wf = example().get_workflow()
                sfw_engine_wf = example().get_workflow()
                wf.run(X=self.X, y=self.y)
                local_engine = LocalEngine(tree_root=local_engine_wf,
                                           num_processes=self.n_cores)
                local_engine_wf = local_engine.run(X=self.X, y=self.y)
                sfw_engine = SomaWorkflowEngine(tree_root=sfw_engine_wf,
                                                num_processes=self.n_cores,
                                                #resource_id="ed203246@gabriel",
                                                #login="******",
                                                remove_finished_wf=False,
                                                remove_local_tree=False
                                                )
                sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
                self.assertTrue(compare_two_node(wf, local_engine_wf))
                self.assertTrue(compare_two_node(wf, sfw_engine_wf))
                self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
                self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
Ejemplo n.º 3
0
 def test_engine_info(self):
     n_samples = 20
     n_features = 100
     n_proc = 2
     X, y = datasets.make_classification(n_samples=n_samples,
                                         n_features=n_features,
                                         n_informative=2,
                                         random_state=1)
     Xy = dict(X=X, y=y)
     cv_svm_local = CV(Methods(*[SVC(
         kernel="linear"), SVC(kernel="rbf")]),
                       n_folds=3)
     swf_engine = SomaWorkflowEngine(cv_svm_local,
                                     num_processes=n_proc,
                                     resource_id="jl237561@gabriel",
                                     login="******",
                                     remove_finished_wf=False,
                                     remove_local_tree=False,
                                     queue="Global_long")
     swf_engine.run(**Xy)
     print("engine_info ================")
     for job_info in swf_engine.engine_info:
         print("  job_info=================")
         print("  mem_cost= ", job_info.mem_cost)
         print("  vmem_cost= ", job_info.vmem_cost)
         print("  time_cost= ", job_info.time_cost)
         self.assertTrue(job_info.time_cost > 0)
Ejemplo n.º 4
0
 def test_examples_local_engine(self):
     list_all_examples = get_wf_example_classes()
     for example in list_all_examples:
         #            if example().__class__.__name__ == "WFExample1" or\
         #                example().__class__.__name__ == "WFExample2":
         # example = list_all_examples[0]
         wf = example().get_workflow()
         local_engine_wf = example().get_workflow()
         sfw_engine_wf = example().get_workflow()
         wf.run(X=self.X, y=self.y)
         local_engine = LocalEngine(tree_root=local_engine_wf,
                                    num_processes=self.n_cores)
         local_engine_wf = local_engine.run(X=self.X, y=self.y)
         sfw_engine = SomaWorkflowEngine(
             tree_root=sfw_engine_wf,
             num_processes=self.n_cores,
             #resource_id="ed203246@gabriel",
             #login="******",
             remove_finished_wf=False,
             remove_local_tree=False)
         sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
         self.assertTrue(compare_two_node(wf, local_engine_wf))
         self.assertTrue(compare_two_node(wf, sfw_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
Ejemplo n.º 5
0
 def test_examples_local_engine(self):
     list_all_examples = get_wf_example_classes()
     for example in list_all_examples:
         # example = list_all_examples[0]
         wf = example().get_workflow()
         wf.run(X=self.X, y=self.y)
         local_engine = LocalEngine(tree_root=wf,
                                    num_processes=self.n_cores)
         local_engine_wf = local_engine.run(X=self.X, y=self.y)
         sfw_engine = SomaWorkflowEngine(
                 tree_root=wf,
                 num_processes=self.n_cores)
         sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
         self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
    def test_cv_best_search_refit_parallel(self):
        n_folds = 2
        n_folds_nested = 3
        k_values = [1, 2]
        C_values = [1, 2]
        n_samples = 500
        n_features = 10000
        n_cores = 2
        X, y = datasets.make_classification(n_samples=n_samples,
                                            n_features=n_features,
                                            n_informative=5)
        # epac workflow for paralle computing
        pipelines = Methods(*[
            Pipe(SelectKBest(
                k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values]))
            for k in k_values
        ])
        pipeline = CVBestSearchRefitParallel(pipelines, n_folds=n_folds_nested)
        wf = CV(pipeline, n_folds=n_folds)

        sfw_engine = SomaWorkflowEngine(tree_root=wf,
                                        num_processes=n_cores,
                                        remove_finished_wf=False,
                                        remove_local_tree=False)
        sfw_engine_wf = sfw_engine.run(X=X, y=y)

        # epac workflow for normal node computing
        pipelines2 = Methods(*[
            Pipe(SelectKBest(
                k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values]))
            for k in k_values
        ])
        pipeline2 = CVBestSearchRefitParallel(pipelines2,
                                              n_folds=n_folds_nested)
        wf2 = CV(pipeline2, n_folds=n_folds)
        wf2.run(X=X, y=y)

        self.assertTrue(compare_two_node(sfw_engine_wf, wf2))
        self.assertTrue(comp_2wf_reduce_res(sfw_engine_wf, wf2))
    def test_cv_best_search_refit_parallel(self):
        n_folds = 2
        n_folds_nested = 3
        k_values = [1, 2]
        C_values = [1, 2]
        n_samples = 500
        n_features = 10000
        n_cores = 2
        X, y = datasets.make_classification(n_samples=n_samples,
                                            n_features=n_features,
                                            n_informative=5)
        # epac workflow for paralle computing
        pipelines = Methods(*[Pipe(SelectKBest(k=k),
                              Methods(*[SVC(kernel="linear", C=C)
                              for C in C_values]))
                              for k in k_values])
        pipeline = CVBestSearchRefitParallel(pipelines,
                                             n_folds=n_folds_nested)
        wf = CV(pipeline, n_folds=n_folds)

        sfw_engine = SomaWorkflowEngine(tree_root=wf,
                                        num_processes=n_cores,
                                        remove_finished_wf=False,
                                        remove_local_tree=False)
        sfw_engine_wf = sfw_engine.run(X=X, y=y)

        # epac workflow for normal node computing
        pipelines2 = Methods(*[Pipe(SelectKBest(k=k),
                              Methods(*[SVC(kernel="linear", C=C)
                              for C in C_values]))
                              for k in k_values])
        pipeline2 = CVBestSearchRefitParallel(pipelines2,
                                             n_folds=n_folds_nested)
        wf2 = CV(pipeline2, n_folds=n_folds)
        wf2.run(X=X, y=y)

        self.assertTrue(compare_two_node(sfw_engine_wf, wf2))
        self.assertTrue(comp_2wf_reduce_res(sfw_engine_wf, wf2))
Ejemplo n.º 8
0
Xy = dict(X=X, y=y)

## 2) Build two workflows respectively
## =======================================================

from sklearn.svm import SVC
from epac import CV, Methods
cv_svm_local = CV(Methods(*[SVC(kernel="linear"),
                            SVC(kernel="rbf")]),
                  n_folds=3)
cv_svm_swf = CV(Methods(*[SVC(kernel="linear"),
                          SVC(kernel="rbf")]),
                n_folds=3)

## 3) Run two workflows using local engine and soma-workflow
## =========================================================

from epac import LocalEngine
local_engine = LocalEngine(cv_svm_local, num_processes=2)
cv_svm = local_engine.run(X=X, y=y)
print cv_svm.reduce()

from epac import SomaWorkflowEngine
swf_engine = SomaWorkflowEngine(cv_svm_swf,
                                num_processes=2,
                                #resource_id="jl237561@gabriel",
                                #login="******",
                                remove_finished_wf=False)
cv_svm = swf_engine.run(**Xy)
print cv_svm.reduce()
Ejemplo n.º 9
0
y = convert2memmap(y)

Xy = dict(X=X, y=y)

## 2) Build two workflows respectively
## =======================================================

from sklearn.svm import SVC
from epac import CV, Methods
cv_svm_local = CV(
    Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3)
cv_svm_swf = CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3)

## 3) Run two workflows using local engine and soma-workflow
## =========================================================

from epac import LocalEngine
local_engine = LocalEngine(cv_svm_local, num_processes=2)
cv_svm = local_engine.run(X=X, y=y)
print(cv_svm.reduce())

from epac import SomaWorkflowEngine
swf_engine = SomaWorkflowEngine(
    cv_svm_swf,
    num_processes=2,
    #resource_id="jl237561@gabriel",
    #login="******",
    remove_finished_wf=False)
cv_svm = swf_engine.run(**Xy)
print(cv_svm.reduce())
Ejemplo n.º 10
0
    def test_memmapping(self):
        ## 1) Building dataset
        ## ============================================================
        if self.memmap:
            # If the proc is 1, always generate the matrix
            # Otherwise, load it if it exists, or create it if it doesn't
            writing_mode = (self.n_proc == 1)
            X = create_mmat(self.n_samples,
                            self.n_features,
                            dir=self.directory,
                            writing_mode=writing_mode)
            y = create_array(self.n_samples, [0, 1],
                             dir=self.directory,
                             writing_mode=writing_mode)
            Xy = dict(X=X, y=y)
        else:
            X, y = datasets.make_classification(n_samples=self.n_samples,
                                                n_features=self.n_features,
                                                n_informative=2,
                                                random_state=1)
            Xy = dict(X=X, y=y)
        ## 2) Building workflow
        ## =======================================================
        from sklearn.svm import SVC
        from epac import CV, Methods
        cv_svm_local = CV(Methods(*[SVC(
            kernel="linear"), SVC(kernel="rbf")]),
                          n_folds=3)

        cv_svm = None
        if self.is_swf:
            # Running on the cluster
            from epac import SomaWorkflowEngine
            mmap_mode = None
            if self.memmap:
                mmap_mode = "r+"
            swf_engine = SomaWorkflowEngine(
                cv_svm_local,
                num_processes=self.n_proc,
                resource_id="jl237561@gabriel",
                login="******",
                # remove_finished_wf=False,
                # remove_local_tree=False,
                mmap_mode=mmap_mode,
                queue="Global_long")

            cv_svm = swf_engine.run(**Xy)

            # Printing information about the jobs
            time.sleep(2)
            print('')
            sum_memory = 0
            max_time_cost = 0
            for job_info in swf_engine.engine_info:
                print(
                    "mem_cost = {0}, vmem_cost = {1}, time_cost = {2}".format(
                        job_info.mem_cost, job_info.vmem_cost,
                        job_info.time_cost))
                sum_memory += job_info.mem_cost
                if max_time_cost < job_info.time_cost:
                    max_time_cost = job_info.time_cost
            print("sum_memory = ", sum_memory)
            print("max_time_cost = ", max_time_cost)
        else:
            # Running on the local machine
            from epac import LocalEngine
            local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc)
            cv_svm = local_engine.run(**Xy)

        cv_svm_reduce = cv_svm.reduce()
        print("\n -> Reducing results")
        print(cv_svm_reduce)

        # Creating the directory to save results, if it doesn't exist
        dirname = 'tmp_save_tree/'
        if self.directory is None:
            directory = '/tmp'
        else:
            directory = self.directory
        if not os.path.isdir(directory):
            os.mkdir(directory)
        dirpath = os.path.join(directory, dirname)
        if not os.path.isdir(dirpath):
            os.mkdir(dirpath)

        if self.n_proc == 1:
            ## 4.1) Saving results on the disk for one process
            ## ===================================================
            store = StoreFs(dirpath=dirpath, clear=True)
            cv_svm.save_tree(store=store)

            with open(os.path.join(directory, "tmp_save_results"), 'w+') \
                    as filename:
                print(filename.name)
                pickle.dump(cv_svm_reduce, filename)

        else:
            ## 4.2) Loading the results for one process
            ## ===================================================
            try:
                store = StoreFs(dirpath=dirpath, clear=False)
                cv_svm_one_proc = store.load()

                with open(os.path.join(directory, "tmp_save_results"), 'r+') \
                        as filename:
                    cv_svm_reduce_one_proc = pickle.load(filename)

                ## 5.2) Comparing results to the results for one process
                ## ===================================================
                print("\nComparing %i proc with one proc" % self.n_proc)
                self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc))
                self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc))
            except KeyError:
                print("Warning: ")
                print("No previous tree detected, no possible "\
                    "comparison of results")
Ejemplo n.º 11
0
cv.reduce()

# Perms + Cross-validation of SVM(linear) and SVM(rbf)
# -------------------------------------
#           Perms        Perm (Splitter)
#      /     |       \
#     0      1       2   Samples (Slicer)
#            |
#           CV           CV (Splitter)
#       /   |   \
#      0    1    2       Folds (Slicer)
#           |
#        Methods         Methods (Splitter)
#    /           \
# SVM(linear)  SVM(rbf)  Classifiers (Estimator)

from sklearn.svm import SVC
from epac import Perms, CV, Methods
perms_cv_svm = Perms(CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")])))
perms_cv_svm.run(X=X, y=y)
perms_cv_svm.reduce()

# Run with soma-workflow for multi-processes
from epac import SomaWorkflowEngine
sfw_engine = SomaWorkflowEngine(
    tree_root=perms_cv_svm,
    num_processes=2,
)
perms_cv_svm = sfw_engine.run(X=X, y=y)
perms_cv_svm.reduce()
Ejemplo n.º 12
0
cv.reduce()

# Perms + Cross-validation of SVM(linear) and SVM(rbf)
# -------------------------------------
#           Perms        Perm (Splitter)
#      /     |       \
#     0      1       2   Samples (Slicer)
#            |
#           CV           CV (Splitter)
#       /   |   \
#      0    1    2       Folds (Slicer)
#           |
#        Methods         Methods (Splitter)
#    /           \
# SVM(linear)  SVM(rbf)  Classifiers (Estimator)

from sklearn.svm import SVC
from epac import Perms, CV, Methods
perms_cv_svm = Perms(CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")])))
perms_cv_svm.run(X=X, y=y)
perms_cv_svm.reduce()


# Run with soma-workflow for multi-processes
from epac import SomaWorkflowEngine
sfw_engine = SomaWorkflowEngine(
                    tree_root=perms_cv_svm,
                    num_processes=2,
                    )
perms_cv_svm = sfw_engine.run(X=X, y=y)
perms_cv_svm.reduce()
Ejemplo n.º 13
0
    def test_memmapping(self):
        ## 1) Building dataset
        ## ============================================================
        if self.memmap:
            # If the proc is 1, always generate the matrix
            # Otherwise, load it if it exists, or create it if it doesn't
            writing_mode = (self.n_proc == 1)
            X = create_mmat(self.n_samples, self.n_features,
                            dir=self.directory,
                            writing_mode=writing_mode)
            y = create_array(self.n_samples, [0, 1], dir=self.directory,
                             writing_mode=writing_mode)
            Xy = dict(X=X, y=y)
        else:
            X, y = datasets.make_classification(n_samples=self.n_samples,
                                                n_features=self.n_features,
                                                n_informative=2,
                                                random_state=1)
            Xy = dict(X=X, y=y)
        ## 2) Building workflow
        ## =======================================================
        from sklearn.svm import SVC
        from epac import CV, Methods
        cv_svm_local = CV(Methods(*[SVC(kernel="linear"),
                                    SVC(kernel="rbf")]), n_folds=3)

        cv_svm = None
        if self.is_swf:
            # Running on the cluster
            from epac import SomaWorkflowEngine
            mmap_mode = None
            if self.memmap:
                mmap_mode = "r+"
            swf_engine = SomaWorkflowEngine(cv_svm_local,
                                            num_processes=self.n_proc,
                                            resource_id="jl237561@gabriel",
                                            login="******",
                                            # remove_finished_wf=False,
                                            # remove_local_tree=False,
                                            mmap_mode=mmap_mode,
                                            queue="Global_long")

            cv_svm = swf_engine.run(**Xy)

            # Printing information about the jobs
            time.sleep(2)
            print ''
            sum_memory = 0
            max_time_cost = 0
            for job_info in swf_engine.engine_info:
                print "mem_cost=", job_info.mem_cost, \
                      ", vmem_cost=", job_info.vmem_cost, \
                      ", time_cost=", job_info.time_cost
                sum_memory += job_info.mem_cost
                if max_time_cost < job_info.time_cost:
                    max_time_cost = job_info.time_cost
            print "sum_memory =", sum_memory
            print "max_time_cost =", max_time_cost
        else:
            # Running on the local machine
            from epac import LocalEngine
            local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc)
            cv_svm = local_engine.run(**Xy)

        cv_svm_reduce = cv_svm.reduce()
        print "\n -> Reducing results"
        print cv_svm_reduce

        # Creating the directory to save results, if it doesn't exist
        dirname = 'tmp_save_tree/'
        if self.directory is None:
            directory = '/tmp'
        else:
            directory = self.directory
        if not os.path.isdir(directory):
            os.mkdir(directory)
        dirpath = os.path.join(directory, dirname)
        if not os.path.isdir(dirpath):
            os.mkdir(dirpath)

        if self.n_proc == 1:
            ## 4.1) Saving results on the disk for one process
            ## ===================================================
            store = StoreFs(dirpath=dirpath, clear=True)
            cv_svm.save_tree(store=store)

            with open(os.path.join(directory, "tmp_save_results"), 'w+') \
                    as filename:
                print filename.name
                pickle.dump(cv_svm_reduce, filename)

        else:
            ## 4.2) Loading the results for one process
            ## ===================================================
            try:
                store = StoreFs(dirpath=dirpath, clear=False)
                cv_svm_one_proc = store.load()

                with open(os.path.join(directory, "tmp_save_results"), 'r+') \
                        as filename:
                    cv_svm_reduce_one_proc = pickle.load(filename)

                ## 5.2) Comparing results to the results for one process
                ## ===================================================
                print "\nComparing %i proc with one proc" % self.n_proc
                self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc))
                self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc))
            except KeyError:
                print "Warning: "
                print "No previous tree detected, no possible "\
                    "comparison of results"