def launch_for_arrays(self, model, parameter_grid, X, y, n_cv_iter=5, train_size=None, test_size=0.25, pre_warm=True, folder=".", name=None, random_state=None): cv_split_filenames = persist_cv_splits(X, y, n_cv_iter=n_cv_iter, train_size=train_size, test_size=test_size, name=name, folder=folder, random_state=random_state) return self.launch_for_splits(model, parameter_grid, cv_split_filenames, pre_warm=pre_warm, collect_files_on_reset=True)
def sample_parallel_proc(): from pyrallel import mmap_utils, model_selection _ = reload(mmap_utils), reload(model_selection) from sklearn.datasets import load_digits from sklearn.preprocessing import MinMaxScaler digits = load_digits() X = MinMaxScaler().fit_transform(digits.data) y = digits.target digits_cv_split_filenames = mmap_utils.persist_cv_splits('digits_10', X, y, 10) mmap_utils.warm_mmap_on_cv_splits(client, digits_cv_split_filenames) from sklearn.svm import LinearSVC from collections import OrderedDict import numpy as np linear_svc_params = OrderedDict(( ('C', np.logspace(-2, 2, 5)), )) linear_svc = LinearSVC() linear_svc_search = model_selection.RandomizedGridSeach(lb_view) linear_svc_search.launch_for_splits(linear_svc, linear_svc_params, digits_cv_split_filenames)
def launch_for_arrays(self, model, parameter_grid, X, y, n_cv_iter=5, train_size=None, test_size=0.25, pre_warm=True, folder=".", name=None, random_state=None): cv_split_filenames = persist_cv_splits( X, y, n_cv_iter=n_cv_iter, train_size=train_size, test_size=test_size, name=name, folder=folder, random_state=random_state) return self.launch_for_splits( model, parameter_grid, cv_split_filenames, pre_warm=pre_warm, collect_files_on_reset=True)
def main(): client = Client() print 'n. clients: ', len(client) digits = load_digits() X = MinMaxScaler().fit_transform(digits.data) y = digits.target pre_processing = hp.choice('preproc_algo', [ scope.PCA( n_components=1 + hp.qlognormal( 'pca_n_comp', np.log(10), np.log(10), 1), whiten=hp.choice( 'pca_whiten', [False, True])), scope.GMM( n_components=1 + hp.qlognormal( 'gmm_n_comp', np.log(100), np.log(10), 1), covariance_type=hp.choice( 'gmm_covtype', ['spherical', 'tied', 'diag', 'full'])), ]) classifier = hp.choice('classifier', [ scope.DecisionTreeClassifier( criterion=hp.choice('dtree_criterion', ['gini', 'entropy']), max_features=hp.uniform('dtree_max_features', 0, 1), max_depth=hp.quniform('dtree_max_depth', 1, 25, 1)), scope.SVC( C=hp.lognormal('svc_rbf_C', 0, 3), kernel='rbf', gamma=hp.lognormal('svc_rbf_gamma', 0, 2), tol=hp.lognormal('svc_rbf_tol', np.log(1e-3), 1)), ]) sklearn_space = {'pre_processing': pre_processing, 'classifier': classifier} digits_cv_split_filenames = mmap_utils.persist_cv_splits( X, y, name='digits_10', n_cv_iter=10) mmap_utils.warm_mmap_on_cv_splits(client, digits_cv_split_filenames) trials = hyperselect.IPythonTrials(client) trials.fmin( partial(compute_evaluation, cv_split_filename=digits_cv_split_filenames[0], ), sklearn_space, algo=hyperopt.tpe.suggest, max_evals=30, verbose=1, ) trials.wait() print trials.best_trial