예제 #1
0
 def launch_for_arrays(self,
                       model,
                       parameter_grid,
                       X,
                       y,
                       n_cv_iter=5,
                       train_size=None,
                       test_size=0.25,
                       pre_warm=True,
                       folder=".",
                       name=None,
                       random_state=None):
     cv_split_filenames = persist_cv_splits(X,
                                            y,
                                            n_cv_iter=n_cv_iter,
                                            train_size=train_size,
                                            test_size=test_size,
                                            name=name,
                                            folder=folder,
                                            random_state=random_state)
     return self.launch_for_splits(model,
                                   parameter_grid,
                                   cv_split_filenames,
                                   pre_warm=pre_warm,
                                   collect_files_on_reset=True)
예제 #2
0
def sample_parallel_proc():
    from pyrallel import mmap_utils, model_selection
    _ = reload(mmap_utils), reload(model_selection)


    from sklearn.datasets import load_digits
    from sklearn.preprocessing import MinMaxScaler

    digits = load_digits()

    X = MinMaxScaler().fit_transform(digits.data)
    y = digits.target

    digits_cv_split_filenames = mmap_utils.persist_cv_splits('digits_10', X, y, 10)

    mmap_utils.warm_mmap_on_cv_splits(client, digits_cv_split_filenames)
    from sklearn.svm import LinearSVC
    from collections import OrderedDict
    import numpy as np

    linear_svc_params = OrderedDict((
        ('C', np.logspace(-2, 2, 5)),
    ))
    linear_svc = LinearSVC()

    linear_svc_search = model_selection.RandomizedGridSeach(lb_view)

    linear_svc_search.launch_for_splits(linear_svc, linear_svc_params, digits_cv_split_filenames)
예제 #3
0
 def launch_for_arrays(self, model, parameter_grid, X, y, n_cv_iter=5,
                       train_size=None, test_size=0.25, pre_warm=True,
                       folder=".", name=None, random_state=None):
     cv_split_filenames = persist_cv_splits(
         X, y, n_cv_iter=n_cv_iter, train_size=train_size,
         test_size=test_size, name=name, folder=folder,
         random_state=random_state)
     return self.launch_for_splits(
         model, parameter_grid, cv_split_filenames, pre_warm=pre_warm,
         collect_files_on_reset=True)
예제 #4
0
파일: foo.py 프로젝트: jaberg/pyrallel
def main():
    client = Client()
    print 'n. clients: ', len(client)

    digits = load_digits()

    X = MinMaxScaler().fit_transform(digits.data)
    y = digits.target

    pre_processing = hp.choice('preproc_algo', [
        scope.PCA(
            n_components=1 + hp.qlognormal(
                'pca_n_comp', np.log(10), np.log(10), 1),
            whiten=hp.choice(
                'pca_whiten', [False, True])),
        scope.GMM(
            n_components=1 + hp.qlognormal(
                'gmm_n_comp', np.log(100), np.log(10), 1),
            covariance_type=hp.choice(
                'gmm_covtype', ['spherical', 'tied', 'diag', 'full'])),
        ])

    classifier = hp.choice('classifier', [
        scope.DecisionTreeClassifier(
            criterion=hp.choice('dtree_criterion', ['gini', 'entropy']),
            max_features=hp.uniform('dtree_max_features', 0, 1),
            max_depth=hp.quniform('dtree_max_depth', 1, 25, 1)),
        scope.SVC(
            C=hp.lognormal('svc_rbf_C', 0, 3),
            kernel='rbf',
            gamma=hp.lognormal('svc_rbf_gamma', 0, 2),
            tol=hp.lognormal('svc_rbf_tol', np.log(1e-3), 1)),
        ])

    sklearn_space = {'pre_processing': pre_processing,
                     'classifier': classifier}

    digits_cv_split_filenames = mmap_utils.persist_cv_splits(
                X, y, name='digits_10', n_cv_iter=10)

    mmap_utils.warm_mmap_on_cv_splits(client, digits_cv_split_filenames)

    trials = hyperselect.IPythonTrials(client)
    trials.fmin(
        partial(compute_evaluation,
            cv_split_filename=digits_cv_split_filenames[0],
            ),
        sklearn_space,
        algo=hyperopt.tpe.suggest,
        max_evals=30,
        verbose=1,
        )
    trials.wait()
    print trials.best_trial