def test_examples_local_engine(self):
        list_all_examples = get_wf_example_classes()
        for example in list_all_examples:
#            if example().__class__.__name__ == "WFExample1" or\
#                example().__class__.__name__ == "WFExample2":
                # example = list_all_examples[0]
                wf = example().get_workflow()
                local_engine_wf = example().get_workflow()
                sfw_engine_wf = example().get_workflow()
                wf.run(X=self.X, y=self.y)
                local_engine = LocalEngine(tree_root=local_engine_wf,
                                           num_processes=self.n_cores)
                local_engine_wf = local_engine.run(X=self.X, y=self.y)
                sfw_engine = SomaWorkflowEngine(tree_root=sfw_engine_wf,
                                                num_processes=self.n_cores,
                                                #resource_id="ed203246@gabriel",
                                                #login="******",
                                                remove_finished_wf=False,
                                                remove_local_tree=False
                                                )
                sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
                self.assertTrue(compare_two_node(wf, local_engine_wf))
                self.assertTrue(compare_two_node(wf, sfw_engine_wf))
                self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
                self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
Beispiel #2
0
 def test_examples_local_engine(self):
     list_all_examples = get_wf_example_classes()
     for example in list_all_examples:
         #            if example().__class__.__name__ == "WFExample1" or\
         #                example().__class__.__name__ == "WFExample2":
         # example = list_all_examples[0]
         wf = example().get_workflow()
         local_engine_wf = example().get_workflow()
         sfw_engine_wf = example().get_workflow()
         wf.run(X=self.X, y=self.y)
         local_engine = LocalEngine(tree_root=local_engine_wf,
                                    num_processes=self.n_cores)
         local_engine_wf = local_engine.run(X=self.X, y=self.y)
         sfw_engine = SomaWorkflowEngine(
             tree_root=sfw_engine_wf,
             num_processes=self.n_cores,
             #resource_id="ed203246@gabriel",
             #login="******",
             remove_finished_wf=False,
             remove_local_tree=False)
         sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
         self.assertTrue(compare_two_node(wf, local_engine_wf))
         self.assertTrue(compare_two_node(wf, sfw_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
 def test_examples_local_engine(self):
     list_all_examples = get_wf_example_classes()
     for example in list_all_examples:
         # example = list_all_examples[0]
         wf = example().get_workflow()
         wf.run(X=self.X, y=self.y)
         local_engine = LocalEngine(tree_root=wf,
                                    num_processes=self.n_cores)
         local_engine_wf = local_engine.run(X=self.X, y=self.y)
         sfw_engine = SomaWorkflowEngine(
                 tree_root=wf,
                 num_processes=self.n_cores)
         sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
         self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
Beispiel #4
0
"""

from sklearn import datasets

X, y = datasets.make_classification(n_samples=500,
                                    n_features=200000,
                                    n_informative=2,
                                    random_state=1)

Xy = dict(X=X, y=y)
## 2) Building workflow
## =======================================================
print " -> Pt2 : X and y created, building workflow"
from sklearn import svm, cross_validation
#kfold = cross_validation.KFold(n=len(X), n_folds=3)
#svc = svm.SVC(C=1, kernel='linear')
#print [svc.fit(X[train], y[train]).score(X[test], y[test]) for train, test in kfold]
from epac import CV, Methods
cv_svm_local = CV(Methods(*[svm.SVC(kernel="linear"),
                            svm.SVC(kernel="rbf")]),
                  n_folds=3)
print " -> Pt3 : Workflow built, defining local engine"
cv_svm = None
n_proc = 2
# Running on the local machine
from epac import LocalEngine
local_engine = LocalEngine(cv_svm_local, num_processes=n_proc)
print " -> Pt4 : Running"
cv_svm = local_engine.run(**Xy)
print " -> Success with %i procs!" % n_proc
Xy = dict(X=X, y=y)

## 2) Build two workflows respectively
## =======================================================

from sklearn.svm import SVC
from epac import CV, Methods
cv_svm_local = CV(Methods(*[SVC(kernel="linear"),
                            SVC(kernel="rbf")]),
                  n_folds=3)
cv_svm_swf = CV(Methods(*[SVC(kernel="linear"),
                          SVC(kernel="rbf")]),
                n_folds=3)

## 3) Run two workflows using local engine and soma-workflow
## =========================================================

from epac import LocalEngine
local_engine = LocalEngine(cv_svm_local, num_processes=2)
cv_svm = local_engine.run(X=X, y=y)
print cv_svm.reduce()

from epac import SomaWorkflowEngine
swf_engine = SomaWorkflowEngine(cv_svm_swf,
                                num_processes=2,
                                #resource_id="jl237561@gabriel",
                                #login="******",
                                remove_finished_wf=False)
cv_svm = swf_engine.run(**Xy)
print cv_svm.reduce()
Beispiel #6
0
y_n_groups = 2

print "jinpeng pt2"
X = np.random.randn(n_samples, n_xfeatures)
Y = np.random.randn(n_samples, n_yfeatures)
x_group_indices = np.array([random.randint(0, x_n_groups)\
    for i in xrange(n_xfeatures)])
#    y_group_indices = np.array([random.randint(0, y_n_groups)\
#        for i in xrange(n_yfeatures)]) 
y_group_indices = np.zeros(n_yfeatures)

print "jinpeng pt3"
# 1) Prediction for each X block return a n_samples x n_yfeatures
mulm = ColumnSplitter(MUOLS(), x_group_indices, y_group_indices)
# mulm.run(X=X, Y=Y)
local_engine = LocalEngine(tree_root=mulm, num_processes=2)
mulm = local_engine.run(X=X, Y=Y)

print "jinpeng pt4"
for leaf in mulm.walk_leaves():
    print "===============leaf.load_results()================="
    print "key =", leaf.get_key()
    tab = leaf.load_results()
    print tab["MUOLS"]['Y/pred']

#print "jinpeng pt5"
## 1) Prediction for each X block return a n_samples x n_yfeatures
#mulm_stats = ColumnSplitter(MUOLSStats(), x_group_indices, y_group_indices)
##mulm_stats.run(X=X, Y=Y)
#local_engine = LocalEngine(tree_root=mulm_stats, num_processes=2)
#mulm_stats = local_engine.run(X=X, Y=Y)
Beispiel #7
0
y = convert2memmap(y)

Xy = dict(X=X, y=y)

## 2) Build two workflows respectively
## =======================================================

from sklearn.svm import SVC
from epac import CV, Methods
cv_svm_local = CV(
    Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3)
cv_svm_swf = CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")]), n_folds=3)

## 3) Run two workflows using local engine and soma-workflow
## =========================================================

from epac import LocalEngine
local_engine = LocalEngine(cv_svm_local, num_processes=2)
cv_svm = local_engine.run(X=X, y=y)
print(cv_svm.reduce())

from epac import SomaWorkflowEngine
swf_engine = SomaWorkflowEngine(
    cv_svm_swf,
    num_processes=2,
    #resource_id="jl237561@gabriel",
    #login="******",
    remove_finished_wf=False)
cv_svm = swf_engine.run(**Xy)
print(cv_svm.reduce())
Beispiel #8
0
    def test_memmapping(self):
        ## 1) Building dataset
        ## ============================================================
        if self.memmap:
            # If the proc is 1, always generate the matrix
            # Otherwise, load it if it exists, or create it if it doesn't
            writing_mode = (self.n_proc == 1)
            X = create_mmat(self.n_samples,
                            self.n_features,
                            dir=self.directory,
                            writing_mode=writing_mode)
            y = create_array(self.n_samples, [0, 1],
                             dir=self.directory,
                             writing_mode=writing_mode)
            Xy = dict(X=X, y=y)
        else:
            X, y = datasets.make_classification(n_samples=self.n_samples,
                                                n_features=self.n_features,
                                                n_informative=2,
                                                random_state=1)
            Xy = dict(X=X, y=y)
        ## 2) Building workflow
        ## =======================================================
        from sklearn.svm import SVC
        from epac import CV, Methods
        cv_svm_local = CV(Methods(*[SVC(
            kernel="linear"), SVC(kernel="rbf")]),
                          n_folds=3)

        cv_svm = None
        if self.is_swf:
            # Running on the cluster
            from epac import SomaWorkflowEngine
            mmap_mode = None
            if self.memmap:
                mmap_mode = "r+"
            swf_engine = SomaWorkflowEngine(
                cv_svm_local,
                num_processes=self.n_proc,
                resource_id="jl237561@gabriel",
                login="******",
                # remove_finished_wf=False,
                # remove_local_tree=False,
                mmap_mode=mmap_mode,
                queue="Global_long")

            cv_svm = swf_engine.run(**Xy)

            # Printing information about the jobs
            time.sleep(2)
            print('')
            sum_memory = 0
            max_time_cost = 0
            for job_info in swf_engine.engine_info:
                print(
                    "mem_cost = {0}, vmem_cost = {1}, time_cost = {2}".format(
                        job_info.mem_cost, job_info.vmem_cost,
                        job_info.time_cost))
                sum_memory += job_info.mem_cost
                if max_time_cost < job_info.time_cost:
                    max_time_cost = job_info.time_cost
            print("sum_memory = ", sum_memory)
            print("max_time_cost = ", max_time_cost)
        else:
            # Running on the local machine
            from epac import LocalEngine
            local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc)
            cv_svm = local_engine.run(**Xy)

        cv_svm_reduce = cv_svm.reduce()
        print("\n -> Reducing results")
        print(cv_svm_reduce)

        # Creating the directory to save results, if it doesn't exist
        dirname = 'tmp_save_tree/'
        if self.directory is None:
            directory = '/tmp'
        else:
            directory = self.directory
        if not os.path.isdir(directory):
            os.mkdir(directory)
        dirpath = os.path.join(directory, dirname)
        if not os.path.isdir(dirpath):
            os.mkdir(dirpath)

        if self.n_proc == 1:
            ## 4.1) Saving results on the disk for one process
            ## ===================================================
            store = StoreFs(dirpath=dirpath, clear=True)
            cv_svm.save_tree(store=store)

            with open(os.path.join(directory, "tmp_save_results"), 'w+') \
                    as filename:
                print(filename.name)
                pickle.dump(cv_svm_reduce, filename)

        else:
            ## 4.2) Loading the results for one process
            ## ===================================================
            try:
                store = StoreFs(dirpath=dirpath, clear=False)
                cv_svm_one_proc = store.load()

                with open(os.path.join(directory, "tmp_save_results"), 'r+') \
                        as filename:
                    cv_svm_reduce_one_proc = pickle.load(filename)

                ## 5.2) Comparing results to the results for one process
                ## ===================================================
                print("\nComparing %i proc with one proc" % self.n_proc)
                self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc))
                self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc))
            except KeyError:
                print("Warning: ")
                print("No previous tree detected, no possible "\
                    "comparison of results")
    def test_memmapping(self):
        ## 1) Building dataset
        ## ============================================================
        if self.memmap:
            # If the proc is 1, always generate the matrix
            # Otherwise, load it if it exists, or create it if it doesn't
            writing_mode = (self.n_proc == 1)
            X = create_mmat(self.n_samples, self.n_features,
                            dir=self.directory,
                            writing_mode=writing_mode)
            y = create_array(self.n_samples, [0, 1], dir=self.directory,
                             writing_mode=writing_mode)
            Xy = dict(X=X, y=y)
        else:
            X, y = datasets.make_classification(n_samples=self.n_samples,
                                                n_features=self.n_features,
                                                n_informative=2,
                                                random_state=1)
            Xy = dict(X=X, y=y)
        ## 2) Building workflow
        ## =======================================================
        from sklearn.svm import SVC
        from epac import CV, Methods
        cv_svm_local = CV(Methods(*[SVC(kernel="linear"),
                                    SVC(kernel="rbf")]), n_folds=3)

        cv_svm = None
        if self.is_swf:
            # Running on the cluster
            from epac import SomaWorkflowEngine
            mmap_mode = None
            if self.memmap:
                mmap_mode = "r+"
            swf_engine = SomaWorkflowEngine(cv_svm_local,
                                            num_processes=self.n_proc,
                                            resource_id="jl237561@gabriel",
                                            login="******",
                                            # remove_finished_wf=False,
                                            # remove_local_tree=False,
                                            mmap_mode=mmap_mode,
                                            queue="Global_long")

            cv_svm = swf_engine.run(**Xy)

            # Printing information about the jobs
            time.sleep(2)
            print ''
            sum_memory = 0
            max_time_cost = 0
            for job_info in swf_engine.engine_info:
                print "mem_cost=", job_info.mem_cost, \
                      ", vmem_cost=", job_info.vmem_cost, \
                      ", time_cost=", job_info.time_cost
                sum_memory += job_info.mem_cost
                if max_time_cost < job_info.time_cost:
                    max_time_cost = job_info.time_cost
            print "sum_memory =", sum_memory
            print "max_time_cost =", max_time_cost
        else:
            # Running on the local machine
            from epac import LocalEngine
            local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc)
            cv_svm = local_engine.run(**Xy)

        cv_svm_reduce = cv_svm.reduce()
        print "\n -> Reducing results"
        print cv_svm_reduce

        # Creating the directory to save results, if it doesn't exist
        dirname = 'tmp_save_tree/'
        if self.directory is None:
            directory = '/tmp'
        else:
            directory = self.directory
        if not os.path.isdir(directory):
            os.mkdir(directory)
        dirpath = os.path.join(directory, dirname)
        if not os.path.isdir(dirpath):
            os.mkdir(dirpath)

        if self.n_proc == 1:
            ## 4.1) Saving results on the disk for one process
            ## ===================================================
            store = StoreFs(dirpath=dirpath, clear=True)
            cv_svm.save_tree(store=store)

            with open(os.path.join(directory, "tmp_save_results"), 'w+') \
                    as filename:
                print filename.name
                pickle.dump(cv_svm_reduce, filename)

        else:
            ## 4.2) Loading the results for one process
            ## ===================================================
            try:
                store = StoreFs(dirpath=dirpath, clear=False)
                cv_svm_one_proc = store.load()

                with open(os.path.join(directory, "tmp_save_results"), 'r+') \
                        as filename:
                    cv_svm_reduce_one_proc = pickle.load(filename)

                ## 5.2) Comparing results to the results for one process
                ## ===================================================
                print "\nComparing %i proc with one proc" % self.n_proc
                self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc))
                self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc))
            except KeyError:
                print "Warning: "
                print "No previous tree detected, no possible "\
                    "comparison of results"