Ejemplo n.º 1
0
    def test_pca_svm(self):
        """
        As a ML researcher, I want to evaluate a certain parly-defined model
        class, in order to do model-family comparisons.

        For example, PCA followed by linear SVM.

        """
        algo = SklearnClassifier(
            partial(hyperopt_estimator,
                    preprocessing=[hpc.pca('pca')],
                    classifier=hpc.svc_linear('classif'),
                    max_evals=10))
        mean_test_error = self.view.protocol(algo)
        print 'mean test error:', mean_test_error
Ejemplo n.º 2
0
    def test_preproc(self):
        """
        As a domain expert, I have a particular pre-processing that I believe
        reveals important patterns in my data.  I would like to know how good
        a classifier can be built on top of my preprocessing algorithm.
        """

        # -- for testing purpose, suppose that the RBM is our "domain-specific
        #    pre-processing"

        algo = SklearnClassifier(
            partial(
                hyperopt_estimator,
                preprocessing=hp.choice('pp',
                    [
                        # -- VQ (alone)
                        [
                            hpc.colkmeans('vq0',
                                n_init=1),
                        ],
                        # -- VQ -> RBM
                        [
                            hpc.colkmeans('vq1',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq1.n_clusters', 1, 5, q=1)),
                                n_init=1),
                            hpc.rbm(name='rbm:alone',
                                verbose=0)
                        ],
                        # -- VQ -> RBM -> PCA
                        [
                            hpc.colkmeans('vq2',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq2.n_clusters', 1, 5, q=1)),
                                n_init=1),
                            hpc.rbm(name='rbm:pre-pca',
                                verbose=0),
                            hpc.pca('pca')
                        ],
                    ]),
                classifier=hpc.any_classifier('classif'),
                algo=tpe.suggest,
                max_evals=10,
                ))
        mean_test_error = self.view.protocol(algo)
        print('mean test error:', mean_test_error)
Ejemplo n.º 3
0
    def test_pca_svm(self):
        """
        As a ML researcher, I want to evaluate a certain parly-defined model
        class, in order to do model-family comparisons.

        For example, PCA followed by linear SVM.

        """
        algo = SklearnClassifier(
            partial(
                hyperopt_estimator,
                preprocessing=[hpc.pca('pca')],
                classifier=hpc.svc_linear('classif'),
                max_evals=10))
        mean_test_error = self.view.protocol(algo)
        print('mean test error:', mean_test_error)
Ejemplo n.º 4
0
    def test_pca_svm(self):
        """
        As a ML researcher, I want to evaluate a certain parly-defined model
        class, in order to do model-family comparisons.

        For example, PCA followed by linear SVM.

        """
        algo = LearningAlgo(
            partial(
                hyperopt_estimator,
                preprocessing=[hpc.pca('pca')],
                classifier=hpc.svc_linear('classif'),
                # trial_timeout=30.0,  # seconds
                verbose=1,
                max_evals=10))
        mean_test_error = self.view.protocol(algo)
        print('\n====Iris: PCA + SVM====', file=sys.stderr)
        print('mean test error:', mean_test_error, file=sys.stderr)
        print('====End optimization====', file=sys.stderr)
Ejemplo n.º 5
0
    def test_preproc(self):
        """
        As a domain expert, I have a particular pre-processing that I believe
        reveals important patterns in my data.  I would like to know how good
        a classifier can be built on top of my preprocessing algorithm.
        """

        # -- for testing purpose, suppose that the RBM is our "domain-specific
        #    pre-processing"

        algo = LearningAlgo(
            partial(
                hyperopt_estimator,
                preprocessing=hp.choice(
                    'pp',
                    [
                        # -- VQ (alone)
                        [
                            hpc.colkmeans(
                                'vq0',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq0.n_clusters', 1.5, 5.5, q=1)),
                                n_init=1,
                                max_iter=100),
                        ],
                        # -- VQ -> RBM
                        [
                            hpc.colkmeans(
                                'vq1',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq1.n_clusters', 1.5, 5.5, q=1)),
                                n_init=1,
                                max_iter=100),
                            hpc.rbm(name='rbm:alone',
                                    n_components=scope.int(
                                        hp.qloguniform('rbm1.n_components',
                                                       np.log(4.5),
                                                       np.log(20.5), 1)),
                                    n_iter=100,
                                    verbose=0)
                        ],
                        # -- VQ -> RBM -> PCA
                        [
                            hpc.colkmeans('vq2',
                                          n_clusters=scope.int(
                                              hp.quniform('vq2.n_clusters',
                                                          1.5,
                                                          5.5,
                                                          q=1)),
                                          n_init=1,
                                          max_iter=100),
                            hpc.rbm(name='rbm:pre-pca',
                                    n_components=scope.int(
                                        hp.qloguniform('rbm2.n_components',
                                                       np.log(4.5),
                                                       np.log(20.5), 1)),
                                    n_iter=100,
                                    verbose=0),
                            hpc.pca('pca')
                        ],
                    ]),
                classifier=hpc.any_classifier('classif'),
                algo=tpe.suggest,
                #trial_timeout=5.0,  # seconds
                verbose=1,
                max_evals=10,
            ))
        mean_test_error = self.view.protocol(algo)
        print('\n====Iris: VQ + RBM + PCA + any classifier====',
              file=sys.stderr)
        print('mean test error:', mean_test_error, file=sys.stderr)
        print('====End optimization====')
Ejemplo n.º 6
0
    HYPERTREE_EXISTS = False

import time

import argparse

# remove headers, footers, and citations from 20 newsgroups data
REMOVE_HEADERS = False
# use the default settings up TfidfVectorizer before doing optimization
PRE_VECTORIZE = False
# Record the test score for every evaluation point
#TEST_ALL_EVALS=True

suppress_output = False

optional_pca = hp.pchoice('preproc', [(0.8, [pca('pca')]),
                                      (0.1, [min_max_scaler('mms')]),
                                      (0.1, [])])


def score(y1, y2):
    length = len(y1)
    correct = 0.0
    for i in xrange(length):
        if y1[i] == y2[i]:
            correct += 1.0
    return correct / length


# TODO: currently does not use seed for anything
def sklearn_newsgroups(classifier,
import time

import argparse

# remove headers, footers, and citations from 20 newsgroups data
REMOVE_HEADERS=False
# use the default settings up TfidfVectorizer before doing optimization
PRE_VECTORIZE=False
# Record the test score for every evaluation point
#TEST_ALL_EVALS=True

suppress_output = False

optional_pca = hp.pchoice('preproc', [
  ( 0.8, [pca('pca')]),
  ( 0.1, [min_max_scaler('mms')]),
  ( 0.1, [] ) ])
 
def score( y1, y2 ):
  length = len( y1 )
  correct = 0.0
  for i in xrange(length):
    if y1[i] == y2[i]:
      correct += 1.0
  return correct / length

# TODO: currently does not use seed for anything
def sklearn_newsgroups( classifier, algorithm, max_evals=100, seed=1,
                        filename='none', preproc=[], loss=None ):