def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features), add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[Pipe(SelectKBest(k=k),
                      SVC(kernel="linear", C=C))
                      for C in C_values
                      for k in k_values])
    pipeline = CVBestSearchRefit(cls,
                  n_folds=options.n_folds_nested, random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
             n_perms=options.n_perms,
             permute="y",
             random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Run Workflow
    ## ===============
    time_fit_predict = time.time()
    ## Run on local machine
    sfw_engine = SomaWorkflowEngine(
                        tree_root=wf,
                        num_processes=options.n_cores
                        )
    ## Run on cluster
#    sfw_engine = SomaWorkflowEngine(
#                        tree_root=wf,
#                        num_processes=options.n_cores,
#                        resource_id="jl237561@gabriel",
#                        login="******")
    # You can use soma_workflow_gui to track your progress
    wf = sfw_engine.run(X=X, y=y)
    print "Time ellapsed, fit predict:",  time.time() - time_fit_predict
    time_reduce = time.time()

    ## 4) Reduce Workflow
    ## ==================
    print wf.reduce()
    print "Time ellapsed, reduce:",   time.time() - time_reduce
Example #2
0
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features),
                              add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[
        Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values
        for k in k_values
    ])
    pipeline = CVBestSearchRefit(cls,
                                 n_folds=options.n_folds_nested,
                                 random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
               n_perms=options.n_perms,
               permute="y",
               random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Run Workflow
    ## ===============
    time_fit_predict = time.time()
    ## Run on local machine
    sfw_engine = SomaWorkflowEngine(tree_root=wf,
                                    num_processes=options.n_cores)
    ## Run on cluster
    #    sfw_engine = SomaWorkflowEngine(
    #                        tree_root=wf,
    #                        num_processes=options.n_cores,
    #                        resource_id="jl237561@gabriel",
    #                        login="******")
    wf = sfw_engine.run(X=X, y=y)
    print "Time ellapsed, fit predict:", time.time() - time_fit_predict
    time_reduce = time.time()

    ## 4) Reduce Workflow
    ## ==================
    print wf.reduce()
    print "Time ellapsed, reduce:", time.time() - time_reduce
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max), options.n_features), add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    # print options
    # sys.exit(0)
    if options.trace:
        from epac import conf

        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(
        n_samples=options.n_samples, n_features=options.n_features, n_informative=options.n_informative
    )

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values for k in k_values])
    pipeline = CVBestSearchRefit(cls, n_folds=options.n_folds_nested, random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds), n_perms=options.n_perms, permute="y", random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Export Workflow to soma_workflow_gui
    ## ===============
    time_fit_predict = time.time()
    if os.path.isdir(options.soma_workflow_dir):
        shutil.rmtree(options.soma_workflow_dir)
    sfw_engine = SomaWorkflowEngine(tree_root=wf, num_processes=options.n_cores)
    sfw_engine.export_to_gui(options.soma_workflow_dir, X=X, y=y)

    print "Time ellapsed, fit predict:", time.time() - time_fit_predict

    #    ## 6) Load Epac tree & Reduce
    #    ## ==========================
    reduce_filename = os.path.join(options.soma_workflow_dir, "reduce.py")
    f = open(reduce_filename, "w")
    reduce_str = (
        """from epac.map_reduce.engine import SomaWorkflowEngine
wf = SomaWorkflowEngine.load_from_gui("%s")
print wf.reduce()
"""
        % options.soma_workflow_dir
    )
    f.write(reduce_str)
    f.close()
    print "#First run\n" "soma_workflow_gui\n" "\t(1)Open %s\n" "\t(2)Submit\n" "\t(3)Transfer Input Files\n" "\t...wait...\n" "\t(4)Transfer Output Files\n" "#When done run:\npython %s" % (
        os.path.join(options.soma_workflow_dir, sfw_engine.open_me_by_soma_workflow_gui),
        reduce_filename,
    )
    def test_mysvc_reducer(self):
        ## 1) Build dataset
        ## ===================================================================
        X, y = datasets.make_classification(n_samples=12,
                                            n_features=10,
                                            n_informative=2,
                                            random_state=1)

        ## 2) run with Methods
        ## ===================================================================
        my_svc1 = MySVC(C=1.0)
        my_svc2 = MySVC(C=2.0)

        two_svc_single = Methods(my_svc1, my_svc2)
        two_svc_local = Methods(my_svc1, my_svc2)
        two_svc_swf = Methods(my_svc1, my_svc2)

        two_svc_single.reducer = MyReducer()
        two_svc_local.reducer = MyReducer()
        two_svc_swf.reducer = MyReducer()

        for leaf in two_svc_single.walk_leaves():
            print leaf.get_key()
        for leaf in two_svc_local.walk_leaves():
            print leaf.get_key()
        for leaf in two_svc_swf.walk_leaves():
            print leaf.get_key()

        # top-down process to call transform
        two_svc_single.run(X=X, y=y)
        # buttom-up process to compute scores
        res_single = two_svc_single.reduce()

        ### You can get below results:
        ### ==================================================================
        ### [{'MySVC(C=1.0)': array([ 1.,  1.])}, {'MySVC(C=2.0)': array([ 1.,  1.])}]

        ### 3) Run using local multi-processes
        ### ==================================================================
        from epac.map_reduce.engine import LocalEngine
        local_engine = LocalEngine(two_svc_local, num_processes=2)
        two_svc_local = local_engine.run(**dict(X=X, y=y))
        res_local = two_svc_local.reduce()

        ### 4) Run using soma-workflow
        ### ==================================================================
        from epac.map_reduce.engine import SomaWorkflowEngine
        sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf,
                                        num_processes=2)
        two_svc_swf = sfw_engine.run(**dict(X=X, y=y))
        res_swf = two_svc_swf.reduce()
        if not repr(res_swf) == repr(res_local):
            raise ValueError("Cannot dump class definition")
        if not repr(res_swf) == repr(res_single):
            raise ValueError("Cannot dump class definition")
    def test_mysvc_reducer(self):
        ## 1) Build dataset
        ## ===================================================================
        X, y = datasets.make_classification(n_samples=12,
                                            n_features=10,
                                            n_informative=2,
                                            random_state=1)

        ## 2) run with Methods
        ## ===================================================================
        my_svc1 = MySVC(C=1.0)
        my_svc2 = MySVC(C=2.0)

        two_svc_single = Methods(my_svc1, my_svc2)
        two_svc_local = Methods(my_svc1, my_svc2)
        two_svc_swf = Methods(my_svc1, my_svc2)

        two_svc_single.reducer = MyReducer()
        two_svc_local.reducer = MyReducer()
        two_svc_swf.reducer = MyReducer()

        for leaf in two_svc_single.walk_leaves():
            print(leaf.get_key())
        for leaf in two_svc_local.walk_leaves():
            print(leaf.get_key())
        for leaf in two_svc_swf.walk_leaves():
            print(leaf.get_key())

        # top-down process to call transform
        two_svc_single.run(X=X, y=y)
        # buttom-up process to compute scores
        res_single = two_svc_single.reduce()

        ### You can get below results:
        ### ==================================================================
        ### [{'MySVC(C=1.0)': array([ 1.,  1.])}, {'MySVC(C=2.0)': array([ 1.,  1.])}]

        ### 3) Run using local multi-processes
        ### ==================================================================
        from epac.map_reduce.engine import LocalEngine
        local_engine = LocalEngine(two_svc_local, num_processes=2)
        two_svc_local = local_engine.run(**dict(X=X, y=y))
        res_local = two_svc_local.reduce()

        ### 4) Run using soma-workflow
        ### ==================================================================
        from epac.map_reduce.engine import SomaWorkflowEngine
        sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2)
        two_svc_swf = sfw_engine.run(**dict(X=X, y=y))
        res_swf = two_svc_swf.reduce()
        if not repr(res_swf) == repr(res_local):
            raise ValueError("Cannot dump class definition")
        if not repr(res_swf) == repr(res_single):
            raise ValueError("Cannot dump class definition")
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features),
                              add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[
        Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values
        for k in k_values
    ])
    pipeline = CVBestSearchRefit(cls,
                                 n_folds=options.n_folds_nested,
                                 random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
               n_perms=options.n_perms,
               permute="y",
               random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Export Workflow to soma_workflow_gui
    ## ===============
    time_fit_predict = time.time()
    if os.path.isdir(options.soma_workflow_dir):
        shutil.rmtree(options.soma_workflow_dir)
    sfw_engine = SomaWorkflowEngine(tree_root=wf,
                                    num_processes=options.n_cores)
    sfw_engine.export_to_gui(options.soma_workflow_dir, X=X, y=y)

    print "Time ellapsed, fit predict:", time.time() - time_fit_predict

    #    ## 6) Load Epac tree & Reduce
    #    ## ==========================
    reduce_filename = os.path.join(options.soma_workflow_dir, "reduce.py")
    f = open(reduce_filename, 'w')
    reduce_str = """from epac.map_reduce.engine import SomaWorkflowEngine
wf = SomaWorkflowEngine.load_from_gui("%s")
print wf.reduce()
""" % options.soma_workflow_dir
    f.write(reduce_str)
    f.close()
    print "#First run\n"\
        "soma_workflow_gui\n"\
        "\t(1)Open %s\n"\
        "\t(2)Submit\n"\
        "\t(3)Transfer Input Files\n"\
        "\t...wait...\n"\
        "\t(4)Transfer Output Files\n"\
        "#When done run:\npython %s" % (
            os.path.join(options.soma_workflow_dir,
                         sfw_engine.open_me_by_soma_workflow_gui),
            reduce_filename)
                n_perms=n_perms,
                permute="y",
                random_state=random_state)

# wf.run(X=X, y=y)
# for leaf in wf.walk_leaves():
#     print leaf.load_results()
# wf.reduce()

# from epac.map_reduce.engine import LocalEngine
# local_engine = LocalEngine(tree_root=wf, num_processes=2)
# wf = local_engine.run(X=X, y=y)
# for leaf in wf.walk_leaves():
#     print leaf.load_results()
# wf.reduce()

from epac.map_reduce.engine import SomaWorkflowEngine
sfw_engine = SomaWorkflowEngine(
                        tree_root=wf,
                        num_processes=3,
                        remove_finished_wf=False,
                        remove_local_tree=False)
wf = sfw_engine.run(X=X, y=y)
#for leaf in wf.walk_leaves():
#    print leaf.load_results()
#for node in wf.walk_true_nodes():
#    print node
#    print node.load_results()

print wf.reduce()
Example #8
0
        from sklearn.svm import SVC
        svc = SVC(C=self.C)
        svc.fit(X, y)
        # "transform" should return a dictionary: ie.: a result, keys are abritrary
        return {"y/pred": svc.predict(X), "y/true": y}

best_svc_tranform = Methods(SVMTransform(C=1.0), SVMTransform(C=2.0))
cv = CV(best_svc_tranform, cv_key="y", cv_type="stratified", n_folds=2,
        reducer=None)
cv.run(X=X, y=y)  # top-down process to call transform
cv.reduce()       # buttom-up process


#
## 4) Run using local multi-processes
## ==================================

from epac.map_reduce.engine import LocalEngine
local_engine = LocalEngine(best_svc, num_processes=2)
best_svc = local_engine.run(**dict(X=X, y=y))
best_svc_tranform.reduce()

## 5) Run using soma-workflow
## ==========================

from epac.map_reduce.engine import SomaWorkflowEngine
sfw_engine = SomaWorkflowEngine(tree_root=best_svc,
                                num_processes=2)
best_svc = sfw_engine.run(**dict(X=X, y=y))
best_svc.reduce()