Esempio n. 1
0
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features),
                              add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[
        Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values
        for k in k_values
    ])
    pipeline = CVBestSearchRefit(cls,
                                 n_folds=options.n_folds_nested,
                                 random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
               n_perms=options.n_perms,
               permute="y",
               random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Run Workflow
    ## ===============
    time_fit_predict = time.time()
    wf.run(X=X, y=y)
    print "Time ellapsed, fit predict:", time.time() - time_fit_predict
    time_reduce = time.time()

    ## 4) Reduce Workflow
    ## ==================
    print wf.reduce()
    print "Time ellapsed, reduce:", time.time() - time_reduce
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features), add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[Pipe(SelectKBest(k=k),
                         SVC(kernel="linear", C=C))
                    for C in C_values
                    for k in k_values])
    pipeline = CVBestSearchRefit(cls,
                                 n_folds=options.n_folds_nested,
                                 random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
               n_perms=options.n_perms,
               permute="y",
               random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Run Workflow
    ## ===============
    time_fit_predict = time.time()
    wf.run(X=X, y=y)
    print "Time ellapsed, fit predict:",  time.time() - time_fit_predict
    time_reduce = time.time()

    ## 4) Reduce Workflow
    ## ==================
    print wf.reduce()
    print "Time ellapsed, reduce:",   time.time() - time_reduce
Esempio n. 3
0
    def test_perm_cv(self):
        X, y = datasets.make_classification(n_samples=20,
                                            n_features=5,
                                            n_informative=2)
        n_perms = 3
        n_folds = 2
        rnd = 0
        # = With EPAC
        wf = Perms(CV(SVC(kernel="linear"),
                      n_folds=n_folds,
                      reducer=ClassificationReport(keep=True)),
                   n_perms=n_perms,
                   permute="y",
                   random_state=rnd,
                   reducer=None)
        r_epac = wf.run(X=X, y=y)
        # = With SKLEARN
        from sklearn.cross_validation import StratifiedKFold
        clf = SVC(kernel="linear")
        r_sklearn = [[None] * n_folds for i in xrange(n_perms)]
        perm_nb = 0
        for perm in Permutations(n=y.shape[0],
                                 n_perms=n_perms,
                                 random_state=rnd):
            y_p = y[perm]
            fold_nb = 0
            for idx_train, idx_test in StratifiedKFold(y=y_p, n_folds=n_folds):
                X_train = X[idx_train, :]
                X_test = X[idx_test, :]
                y_p_train = y_p[idx_train, :]
                clf.fit(X_train, y_p_train)
                r_sklearn[perm_nb][fold_nb] = clf.predict(X_test)
                fold_nb += 1
            perm_nb += 1

        cmp_key = 'y' + conf.SEP + conf.TEST + conf.SEP + conf.PREDICTION
        # Comparison
        for iperm in range(n_perms):
            for icv in range(n_folds):
                comp = np.all(
                    np.asarray(r_epac[iperm][icv][cmp_key]) == np.asarray(
                        r_sklearn[iperm][icv]))
                self.assertTrue(comp, u'Diff Perm / CV: EPAC vs sklearn')

        # test reduce
        for iperm in range(n_perms):
            for icv in range(n_folds):
                ## iperm = 0
                ## icv = 0
                comp = np.all(
                    np.asarray(wf.reduce().values()[iperm][cmp_key][icv]) ==
                    np.asarray(r_sklearn[iperm][icv]))
                self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
Esempio n. 4
0
    def test_perm_cv(self):
        X, y = datasets.make_classification(n_samples=20, n_features=5,
                                            n_informative=2)
        n_perms = 3
        n_folds = 2
        rnd = 0
        # = With EPAC
        wf = Perms(CV(SVC(kernel="linear"), n_folds=n_folds,
                            reducer=ClassificationReport(keep=True)),
                            n_perms=n_perms, permute="y",
                            random_state=rnd, reducer=None)
        r_epac = wf.run(X=X, y=y)
        # = With SKLEARN
        from sklearn.cross_validation import StratifiedKFold
        clf = SVC(kernel="linear")
        r_sklearn = [[None] * n_folds for i in xrange(n_perms)]
        perm_nb = 0
        for perm in Permutations(n=y.shape[0], n_perms=n_perms,
                                random_state=rnd):
            y_p = y[perm]
            fold_nb = 0
            for idx_train, idx_test in StratifiedKFold(y=y_p,
                                                       n_folds=n_folds):
                X_train = X[idx_train, :]
                X_test = X[idx_test, :]
                y_p_train = y_p[idx_train, :]
                clf.fit(X_train, y_p_train)
                r_sklearn[perm_nb][fold_nb] = clf.predict(X_test)
                fold_nb += 1
            perm_nb += 1

        cmp_key = 'y' + conf.SEP + conf.TEST + conf.SEP + conf.PREDICTION
        # Comparison
        for iperm in range(n_perms):
            for icv in range(n_folds):
                comp = np.all(
                            np.asarray(r_epac[iperm][icv][cmp_key]) ==
                            np.asarray(r_sklearn[iperm][icv])
                             )
                self.assertTrue(comp, u'Diff Perm / CV: EPAC vs sklearn')

        # test reduce
        for iperm in range(n_perms):
            for icv in range(n_folds):
                ## iperm = 0
                ## icv = 0
                comp = np.all(
                    np.asarray(wf.reduce().values()[iperm][cmp_key][icv])
                    ==
                    np.asarray(r_sklearn[iperm][icv])
                    )
                self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
Esempio n. 5
0
# -*- coding: utf-8 -*-
"""
Created on Thu May 23 15:21:35 2013

@author: ed203246
"""

from sklearn import datasets
from sklearn.svm import LinearSVC as SVM
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.feature_selection import SelectKBest
from epac.map_reduce.reducers import PvalPerms
import numpy

X, y = datasets.make_classification(n_samples=100,
                                    n_features=200,
                                    n_informative=2)
X = numpy.random.rand(*X.shape)

from epac import Perms, CV, Methods
perms_cv_svm = Perms(CV(Methods(SVM(loss="l1"), SVM(loss="l2"))), n_perms=100)
perms_cv_svm.run(X=X, y=y)
perms_cv_svm.reduce()

self = perms_cv_svm
key = 'LinearSVC(loss=l1)'
self = PvalPerms()
Esempio n. 6
0
# -*- coding: utf-8 -*-
"""
Created on Thu May 23 15:21:35 2013

@author: ed203246
"""


from sklearn import datasets
from sklearn.svm import LinearSVC as SVM
from sklearn.lda import LDA
from sklearn.feature_selection import SelectKBest

X, y = datasets.make_classification(n_samples=100, n_features=200, n_informative=2)
X = numpy.random.rand(*X.shape)

from epac import Perms, CV, Methods

perms_cv_svm = Perms(CV(Methods(SVM(loss="l1"), SVM(loss="l2"))), n_perms=100)
perms_cv_svm.run(X=X, y=y)
perms_cv_svm.reduce()

self = perms_cv_svm
key = "LinearSVC(loss=l1)"
self = PvalPerms()