def do_all(options): if options.k_max != "auto": k_values = range_log2(np.minimum(int(options.k_max), options.n_features), add_n=True) else: k_values = range_log2(options.n_features, add_n=True) C_values = [1, 10] random_state = 0 #print options #sys.exit(0) if options.trace: from epac import conf conf.TRACE_TOPDOWN = True ## 1) Build dataset ## ================ X, y = datasets.make_classification(n_samples=options.n_samples, n_features=options.n_features, n_informative=options.n_informative) ## 2) Build Workflow ## ================= time_start = time.time() ## CV + Grid search of a pipeline with a nested grid search cls = Methods(*[ Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values for k in k_values ]) pipeline = CVBestSearchRefit(cls, n_folds=options.n_folds_nested, random_state=random_state) wf = Perms(CV(pipeline, n_folds=options.n_folds), n_perms=options.n_perms, permute="y", random_state=random_state) print "Time ellapsed, tree construction:", time.time() - time_start ## 3) Run Workflow ## =============== time_fit_predict = time.time() wf.run(X=X, y=y) print "Time ellapsed, fit predict:", time.time() - time_fit_predict time_reduce = time.time() ## 4) Reduce Workflow ## ================== print wf.reduce() print "Time ellapsed, reduce:", time.time() - time_reduce
def do_all(options): if options.k_max != "auto": k_values = range_log2(np.minimum(int(options.k_max), options.n_features), add_n=True) else: k_values = range_log2(options.n_features, add_n=True) C_values = [1, 10] random_state = 0 #print options #sys.exit(0) if options.trace: from epac import conf conf.TRACE_TOPDOWN = True ## 1) Build dataset ## ================ X, y = datasets.make_classification(n_samples=options.n_samples, n_features=options.n_features, n_informative=options.n_informative) ## 2) Build Workflow ## ================= time_start = time.time() ## CV + Grid search of a pipeline with a nested grid search cls = Methods(*[Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values for k in k_values]) pipeline = CVBestSearchRefit(cls, n_folds=options.n_folds_nested, random_state=random_state) wf = Perms(CV(pipeline, n_folds=options.n_folds), n_perms=options.n_perms, permute="y", random_state=random_state) print "Time ellapsed, tree construction:", time.time() - time_start ## 3) Run Workflow ## =============== time_fit_predict = time.time() wf.run(X=X, y=y) print "Time ellapsed, fit predict:", time.time() - time_fit_predict time_reduce = time.time() ## 4) Reduce Workflow ## ================== print wf.reduce() print "Time ellapsed, reduce:", time.time() - time_reduce
def test_perm_cv(self): X, y = datasets.make_classification(n_samples=20, n_features=5, n_informative=2) n_perms = 3 n_folds = 2 rnd = 0 # = With EPAC wf = Perms(CV(SVC(kernel="linear"), n_folds=n_folds, reducer=ClassificationReport(keep=True)), n_perms=n_perms, permute="y", random_state=rnd, reducer=None) r_epac = wf.run(X=X, y=y) # = With SKLEARN from sklearn.cross_validation import StratifiedKFold clf = SVC(kernel="linear") r_sklearn = [[None] * n_folds for i in xrange(n_perms)] perm_nb = 0 for perm in Permutations(n=y.shape[0], n_perms=n_perms, random_state=rnd): y_p = y[perm] fold_nb = 0 for idx_train, idx_test in StratifiedKFold(y=y_p, n_folds=n_folds): X_train = X[idx_train, :] X_test = X[idx_test, :] y_p_train = y_p[idx_train, :] clf.fit(X_train, y_p_train) r_sklearn[perm_nb][fold_nb] = clf.predict(X_test) fold_nb += 1 perm_nb += 1 cmp_key = 'y' + conf.SEP + conf.TEST + conf.SEP + conf.PREDICTION # Comparison for iperm in range(n_perms): for icv in range(n_folds): comp = np.all( np.asarray(r_epac[iperm][icv][cmp_key]) == np.asarray( r_sklearn[iperm][icv])) self.assertTrue(comp, u'Diff Perm / CV: EPAC vs sklearn') # test reduce for iperm in range(n_perms): for icv in range(n_folds): ## iperm = 0 ## icv = 0 comp = np.all( np.asarray(wf.reduce().values()[iperm][cmp_key][icv]) == np.asarray(r_sklearn[iperm][icv])) self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
def test_perm_cv(self): X, y = datasets.make_classification(n_samples=20, n_features=5, n_informative=2) n_perms = 3 n_folds = 2 rnd = 0 # = With EPAC wf = Perms(CV(SVC(kernel="linear"), n_folds=n_folds, reducer=ClassificationReport(keep=True)), n_perms=n_perms, permute="y", random_state=rnd, reducer=None) r_epac = wf.run(X=X, y=y) # = With SKLEARN from sklearn.cross_validation import StratifiedKFold clf = SVC(kernel="linear") r_sklearn = [[None] * n_folds for i in xrange(n_perms)] perm_nb = 0 for perm in Permutations(n=y.shape[0], n_perms=n_perms, random_state=rnd): y_p = y[perm] fold_nb = 0 for idx_train, idx_test in StratifiedKFold(y=y_p, n_folds=n_folds): X_train = X[idx_train, :] X_test = X[idx_test, :] y_p_train = y_p[idx_train, :] clf.fit(X_train, y_p_train) r_sklearn[perm_nb][fold_nb] = clf.predict(X_test) fold_nb += 1 perm_nb += 1 cmp_key = 'y' + conf.SEP + conf.TEST + conf.SEP + conf.PREDICTION # Comparison for iperm in range(n_perms): for icv in range(n_folds): comp = np.all( np.asarray(r_epac[iperm][icv][cmp_key]) == np.asarray(r_sklearn[iperm][icv]) ) self.assertTrue(comp, u'Diff Perm / CV: EPAC vs sklearn') # test reduce for iperm in range(n_perms): for icv in range(n_folds): ## iperm = 0 ## icv = 0 comp = np.all( np.asarray(wf.reduce().values()[iperm][cmp_key][icv]) == np.asarray(r_sklearn[iperm][icv]) ) self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
# -*- coding: utf-8 -*- """ Created on Thu May 23 15:21:35 2013 @author: ed203246 """ from sklearn import datasets from sklearn.svm import LinearSVC as SVM from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.feature_selection import SelectKBest from epac.map_reduce.reducers import PvalPerms import numpy X, y = datasets.make_classification(n_samples=100, n_features=200, n_informative=2) X = numpy.random.rand(*X.shape) from epac import Perms, CV, Methods perms_cv_svm = Perms(CV(Methods(SVM(loss="l1"), SVM(loss="l2"))), n_perms=100) perms_cv_svm.run(X=X, y=y) perms_cv_svm.reduce() self = perms_cv_svm key = 'LinearSVC(loss=l1)' self = PvalPerms()
# -*- coding: utf-8 -*- """ Created on Thu May 23 15:21:35 2013 @author: ed203246 """ from sklearn import datasets from sklearn.svm import LinearSVC as SVM from sklearn.lda import LDA from sklearn.feature_selection import SelectKBest X, y = datasets.make_classification(n_samples=100, n_features=200, n_informative=2) X = numpy.random.rand(*X.shape) from epac import Perms, CV, Methods perms_cv_svm = Perms(CV(Methods(SVM(loss="l1"), SVM(loss="l2"))), n_perms=100) perms_cv_svm.run(X=X, y=y) perms_cv_svm.reduce() self = perms_cv_svm key = "LinearSVC(loss=l1)" self = PvalPerms()