def test_pca_svm(self): """ As a ML researcher, I want to evaluate a certain parly-defined model class, in order to do model-family comparisons. For example, PCA followed by linear SVM. """ algo = SklearnClassifier( partial(hyperopt_estimator, preprocessing=[hpc.pca('pca')], classifier=hpc.svc_linear('classif'), max_evals=10)) mean_test_error = self.view.protocol(algo) print 'mean test error:', mean_test_error
def test_preproc(self): """ As a domain expert, I have a particular pre-processing that I believe reveals important patterns in my data. I would like to know how good a classifier can be built on top of my preprocessing algorithm. """ # -- for testing purpose, suppose that the RBM is our "domain-specific # pre-processing" algo = SklearnClassifier( partial( hyperopt_estimator, preprocessing=hp.choice('pp', [ # -- VQ (alone) [ hpc.colkmeans('vq0', n_init=1), ], # -- VQ -> RBM [ hpc.colkmeans('vq1', n_clusters=scope.int( hp.quniform( 'vq1.n_clusters', 1, 5, q=1)), n_init=1), hpc.rbm(name='rbm:alone', verbose=0) ], # -- VQ -> RBM -> PCA [ hpc.colkmeans('vq2', n_clusters=scope.int( hp.quniform( 'vq2.n_clusters', 1, 5, q=1)), n_init=1), hpc.rbm(name='rbm:pre-pca', verbose=0), hpc.pca('pca') ], ]), classifier=hpc.any_classifier('classif'), algo=tpe.suggest, max_evals=10, )) mean_test_error = self.view.protocol(algo) print('mean test error:', mean_test_error)
def test_pca_svm(self): """ As a ML researcher, I want to evaluate a certain parly-defined model class, in order to do model-family comparisons. For example, PCA followed by linear SVM. """ algo = SklearnClassifier( partial( hyperopt_estimator, preprocessing=[hpc.pca('pca')], classifier=hpc.svc_linear('classif'), max_evals=10)) mean_test_error = self.view.protocol(algo) print('mean test error:', mean_test_error)
def test_pca_svm(self): """ As a ML researcher, I want to evaluate a certain parly-defined model class, in order to do model-family comparisons. For example, PCA followed by linear SVM. """ algo = LearningAlgo( partial( hyperopt_estimator, preprocessing=[hpc.pca('pca')], classifier=hpc.svc_linear('classif'), # trial_timeout=30.0, # seconds verbose=1, max_evals=10)) mean_test_error = self.view.protocol(algo) print('\n====Iris: PCA + SVM====', file=sys.stderr) print('mean test error:', mean_test_error, file=sys.stderr) print('====End optimization====', file=sys.stderr)
def test_preproc(self): """ As a domain expert, I have a particular pre-processing that I believe reveals important patterns in my data. I would like to know how good a classifier can be built on top of my preprocessing algorithm. """ # -- for testing purpose, suppose that the RBM is our "domain-specific # pre-processing" algo = LearningAlgo( partial( hyperopt_estimator, preprocessing=hp.choice( 'pp', [ # -- VQ (alone) [ hpc.colkmeans( 'vq0', n_clusters=scope.int( hp.quniform( 'vq0.n_clusters', 1.5, 5.5, q=1)), n_init=1, max_iter=100), ], # -- VQ -> RBM [ hpc.colkmeans( 'vq1', n_clusters=scope.int( hp.quniform( 'vq1.n_clusters', 1.5, 5.5, q=1)), n_init=1, max_iter=100), hpc.rbm(name='rbm:alone', n_components=scope.int( hp.qloguniform('rbm1.n_components', np.log(4.5), np.log(20.5), 1)), n_iter=100, verbose=0) ], # -- VQ -> RBM -> PCA [ hpc.colkmeans('vq2', n_clusters=scope.int( hp.quniform('vq2.n_clusters', 1.5, 5.5, q=1)), n_init=1, max_iter=100), hpc.rbm(name='rbm:pre-pca', n_components=scope.int( hp.qloguniform('rbm2.n_components', np.log(4.5), np.log(20.5), 1)), n_iter=100, verbose=0), hpc.pca('pca') ], ]), classifier=hpc.any_classifier('classif'), algo=tpe.suggest, #trial_timeout=5.0, # seconds verbose=1, max_evals=10, )) mean_test_error = self.view.protocol(algo) print('\n====Iris: VQ + RBM + PCA + any classifier====', file=sys.stderr) print('mean test error:', mean_test_error, file=sys.stderr) print('====End optimization====')
HYPERTREE_EXISTS = False import time import argparse # remove headers, footers, and citations from 20 newsgroups data REMOVE_HEADERS = False # use the default settings up TfidfVectorizer before doing optimization PRE_VECTORIZE = False # Record the test score for every evaluation point #TEST_ALL_EVALS=True suppress_output = False optional_pca = hp.pchoice('preproc', [(0.8, [pca('pca')]), (0.1, [min_max_scaler('mms')]), (0.1, [])]) def score(y1, y2): length = len(y1) correct = 0.0 for i in xrange(length): if y1[i] == y2[i]: correct += 1.0 return correct / length # TODO: currently does not use seed for anything def sklearn_newsgroups(classifier,
import time import argparse # remove headers, footers, and citations from 20 newsgroups data REMOVE_HEADERS=False # use the default settings up TfidfVectorizer before doing optimization PRE_VECTORIZE=False # Record the test score for every evaluation point #TEST_ALL_EVALS=True suppress_output = False optional_pca = hp.pchoice('preproc', [ ( 0.8, [pca('pca')]), ( 0.1, [min_max_scaler('mms')]), ( 0.1, [] ) ]) def score( y1, y2 ): length = len( y1 ) correct = 0.0 for i in xrange(length): if y1[i] == y2[i]: correct += 1.0 return correct / length # TODO: currently does not use seed for anything def sklearn_newsgroups( classifier, algorithm, max_evals=100, seed=1, filename='none', preproc=[], loss=None ):