def make_methods_list(): methods = [] """ # SVMs. for C in [1e2, 1e3, 1e4]: for gamma in [0.01, 0.05, 0.1]: methods.append(delayed(sl.svm.SVC)(kernel='rbf', C=C, gamma=gamma)) methods.append(delayed(sl.svm.SVC)(kernel='poly', degree=2, C=C, gamma=gamma)) methods.append(delayed(sl.svm.SVC)(kernel='poly', degree=3, C=C, gamma=gamma)) methods.append(delayed(sl.svm.SVC)(kernel='linear', C=C)) # Misc. methods.append(delayed(sl.tree.DecisionTreeClassifier)()) methods.append(delayed(sl.naive_bayes.GaussianNB)()) """ # Random Forests. for n in [10, 50, 100, 200, 500]: methods.append(delayed(sl.ensemble.RandomForestClassifier) (n_estimators=n)) for n in [10, 50, 100, 200, 500]: methods.append(delayed(sl.ensemble.RandomForestClassifier) (n_estimators=n, criterion='entropy')) # Gradient boosting. for n in [10, 50, 100, 200, 500]: for a in [.0001, .001, .01, .1, 1., 10.]: methods.append(delayed(sl.ensemble.GradientBoostingClassifier) (n_estimators=n, learn_rate=a)) # Nearest neighbors. for n in [1, 5, 10, 15, 20]: methods.append(delayed(sl.neighbors.KNeighborsClassifier) (n_neighbors=n)) for n in [1, 5, 10, 15, 20]: methods.append(delayed(sl.neighbors.KNeighborsClassifier) (n_neighbors=n, weights='distance')) # l1-penalized logistic regression for c in [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100]: methods.append(delayed(sl.linear_model.LogisticRegression) (C=c, penalty='l1', tol=0.01)) # l2-penalized logistic regression for c in [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100]: methods.append(delayed(sl.linear_model.LogisticRegression) (C=c, penalty='l2', tol=0.01)) return methods
if args.j < 0 or args.j >= n_jobs: raise ValueError('Job ID out of range.') # Range of jobs to run job_batchsize = int(J / n_jobs) a = args.j * job_batchsize b = a + job_batchsize if (args.j < n_jobs-1) else J # Get cross-validation folds cv = LeaveOneOut(n) if (args.k == 0) else KFold(n, args.k) if not args.a: # Setup list of jobs jobs = iter(delayed(run_method)(method, X, y, train, test, force=args.f) for method, (train, test) in itertools.product(methods, cv)) # Run only jobs in batch for job in itertools.islice(jobs, a, b): run_delayed(job) else: # Aggregate results accs = np.empty(M * args.k) wall = np.empty(M * args.k) accs.fill(np.nan) wall.fill(np.nan) # Fetch data jobs = iter(delayed(run_method)(method, X, y, train, test, load=True) for method, (train, test) in itertools.product(methods, cv))