コード例 #1
0
def sklearn_digits( classifier=None ):
  #estim = hyperopt_estimator( classifier=any_classifier('hai'), algo=tpe.suggest )
  if classifier is None:
    classifier = any_classifier('any')
  estim = hyperopt_estimator( classifier=classifier )

  digits = load_digits()
  X = digits.data
  y = digits.target

  test_size = 50
  np.random.seed(0)
  indices = np.random.permutation(len(X))
  X_train = X[ indices[:-test_size]]
  y_train = y[ indices[:-test_size]]
  X_test = X[ indices[-test_size:]]
  y_test = y[ indices[-test_size:]]

  estim.fit( X_train, y_train )

  pred = estim.predict( X_test )
  print( pred )
  print ( y_test )

  print( score( pred, y_test ) ) 
  
  print( estim.best_model() )
コード例 #2
0
 def test_fit_biginc(self):
     model = hyperopt_estimator(
         classifier=components.any_classifier('classifier'),
         verbose=1, max_evals=5, trial_timeout=5.0, fit_increment=20)
     model.fit(self.X, self.Y)
     # -- make sure we only get 5 even with big fit_increment
     assert len(model.trials.trials) == 5
コード例 #3
0
 def test_fit(self):
     model = hyperopt_estimator(
         classifier=components.any_classifier('classifier'),
         verbose=1,
         max_evals=5,
         trial_timeout=5.0)
     model.fit(self.X, self.Y)
     assert len(model.trials.trials) == 5
コード例 #4
0
 def test_fit_iter_basic(self):
     model = hyperopt_estimator(
         classifier=components.any_classifier('classifier'), 
         verbose=1, trial_timeout=5.0)
     for ii, trials in enumerate(model.fit_iter(self.X, self.Y)):
         assert trials is model.trials
         assert len(trials.trials) == ii
         if ii == 10:
             break
コード例 #5
0
 def setUp(self):
     self.algo = SklearnClassifier(
         partial(
             hyperopt_estimator,
             preprocessing=simple_small_image_preprocessing('pp'),
             classifier=hpc.any_classifier('classif'),
             max_evals=100,
             verbose=1,
             algo=tpe.suggest,
             fit_timeout=5.0, # -- seconds
             ))
コード例 #6
0
 def test_warm_start(self):
     model = hyperopt_estimator(
         classifier=components.any_classifier('classifier'), 
         verbose=1, max_evals=5, trial_timeout=5.0)
     params = model.get_params()
     assert params['algo'] == rand.suggest
     assert params['max_evals'] == 5
     model.fit(self.X, self.Y, warm_start=False)
     assert len(model.trials.trials) == 5
     model.set_params(algo=tpe.suggest, max_evals=10)
     params = model.get_params()
     assert params['algo'] == tpe.suggest
     assert params['max_evals'] == 10
     model.fit(self.X, self.Y, warm_start=True)
     assert len(model.trials.trials) == 15  # 5 + 10 = 15.
コード例 #7
0
    def test_preproc(self):
        """
        As a domain expert, I have a particular pre-processing that I believe
        reveals important patterns in my data.  I would like to know how good
        a classifier can be built on top of my preprocessing algorithm.
        """

        # -- for testing purpose, suppose that the RBM is our "domain-specific
        #    pre-processing"

        algo = SklearnClassifier(
            partial(
                hyperopt_estimator,
                preprocessing=hp.choice('pp',
                    [
                        # -- VQ (alone)
                        [
                            hpc.colkmeans('vq0',
                                n_init=1),
                        ],
                        # -- VQ -> RBM
                        [
                            hpc.colkmeans('vq1',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq1.n_clusters', 1, 5, q=1)),
                                n_init=1),
                            hpc.rbm(name='rbm:alone',
                                verbose=0)
                        ],
                        # -- VQ -> RBM -> PCA
                        [
                            hpc.colkmeans('vq2',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq2.n_clusters', 1, 5, q=1)),
                                n_init=1),
                            hpc.rbm(name='rbm:pre-pca',
                                verbose=0),
                            hpc.pca('pca')
                        ],
                    ]),
                classifier=hpc.any_classifier('classif'),
                algo=tpe.suggest,
                max_evals=10,
                ))
        mean_test_error = self.view.protocol(algo)
        print('mean test error:', mean_test_error)
コード例 #8
0
def create_random_pipeline():

    pipeline_space = {'clf': any_classifier('my_clf'), 'preprocessor': any_preprocessing('my_prep')}

    sample = hyperopt.pyll.stochastic.sample(pipeline_space)

    classifier = sample['clf']
    p = None
    try:
        preprocessor = sample['preprocessor'][0]
        p = Pipeline([('preprocessing', preprocessor), ('classifier', classifier)])
    except:
        p = Pipeline([('classifier', classifier)])



    return p
コード例 #9
0
    def test_search_all(self):
        """
        As a ML researcher, I want a quick way to do model selection
        implicitly, in order to get a baseline accuracy score for a new data
        set.

        """
        algo = LearningAlgo(
            partial(
                hyperopt_estimator,
                classifier=hpc.any_classifier('classifier'),
                # trial_timeout=15.0,  # seconds
                verbose=1,
                max_evals=10,
            ))
        mean_test_error = self.view.protocol(algo)
        print('\n====Iris: any preprocessing + any classifier====',
              file=sys.stderr)
        print('mean test error:', mean_test_error, file=sys.stderr)
        print('====End optimization====', file=sys.stderr)
コード例 #10
0
def mnist_digits():
  estim = hyperopt_estimator( classifier=any_classifier('hai') )

  digits = fetch_mldata('MNIST original')
  X = digits.data
  y = digits.target

  test_size = int( 0.2 * len( y ) )
  np.random.seed(0)
  indices = np.random.permutation(len(X))
  X_train = X[ indices[:-test_size]]
  y_train = y[ indices[:-test_size]]
  X_test = X[ indices[-test_size:]]
  y_test = y[ indices[-test_size:]]

  estim.fit( X_train, y_train )

  pred = estim.predict( X_test )
  print( pred )
  print ( y_test )

  print( score( pred, y_test ) ) 

  print( estim.best_model() )
コード例 #11
0
    def test_preproc(self):
        """
        As a domain expert, I have a particular pre-processing that I believe
        reveals important patterns in my data.  I would like to know how good
        a classifier can be built on top of my preprocessing algorithm.
        """

        # -- for testing purpose, suppose that the RBM is our "domain-specific
        #    pre-processing"

        algo = LearningAlgo(
            partial(
                hyperopt_estimator,
                preprocessing=hp.choice(
                    'pp',
                    [
                        # -- VQ (alone)
                        [
                            hpc.colkmeans(
                                'vq0',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq0.n_clusters', 1.5, 5.5, q=1)),
                                n_init=1,
                                max_iter=100),
                        ],
                        # -- VQ -> RBM
                        [
                            hpc.colkmeans(
                                'vq1',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq1.n_clusters', 1.5, 5.5, q=1)),
                                n_init=1,
                                max_iter=100),
                            hpc.rbm(name='rbm:alone',
                                    n_components=scope.int(
                                        hp.qloguniform('rbm1.n_components',
                                                       np.log(4.5),
                                                       np.log(20.5), 1)),
                                    n_iter=100,
                                    verbose=0)
                        ],
                        # -- VQ -> RBM -> PCA
                        [
                            hpc.colkmeans('vq2',
                                          n_clusters=scope.int(
                                              hp.quniform('vq2.n_clusters',
                                                          1.5,
                                                          5.5,
                                                          q=1)),
                                          n_init=1,
                                          max_iter=100),
                            hpc.rbm(name='rbm:pre-pca',
                                    n_components=scope.int(
                                        hp.qloguniform('rbm2.n_components',
                                                       np.log(4.5),
                                                       np.log(20.5), 1)),
                                    n_iter=100,
                                    verbose=0),
                            hpc.pca('pca')
                        ],
                    ]),
                classifier=hpc.any_classifier('classif'),
                algo=tpe.suggest,
                #trial_timeout=5.0,  # seconds
                verbose=1,
                max_evals=10,
            ))
        mean_test_error = self.view.protocol(algo)
        print('\n====Iris: VQ + RBM + PCA + any classifier====',
              file=sys.stderr)
        print('mean test error:', mean_test_error, file=sys.stderr)
        print('====End optimization====')
コード例 #12
0
                'loss': -1 * np.mean(scores),
                'status': STATUS_OK,
                'training_time': training_time,
                'total_time': total_time
            }
        except:
            total_time = time.time() - start_time
            return {
                'loss': np.inf,
                'status': STATUS_OK,
                'training_time': 0,
                'total_time': total_time
            }

    pipeline_space = {
        'clf': any_classifier('my_clf'),
        'preprocessor': any_preprocessing('my_prep')
    }

    print(pipeline_space)

    trials = Trials()
    best = fmin(objective,
                space=pipeline_space,
                algo=tpe.suggest,
                max_evals=200,
                trials=trials)

    print(trials.best_trial)

    pickle.dump(trials, open("/tmp/trials.p", "wb"))
コード例 #13
0
def main():
  dig = Digits()
  #dig = Digits( use_mnist=True )
  #dig.run( test_size=20 )
  """
  best = fmin( fn=dig.hyperopt_wrapper,
              space={'classifier' : 'LinearSVC',
                     'C' : hp.lognormal('svm_C', 0, 1 ),
                     'loss' : hp.choice('loss', ['l1', 'l2'])
                    },
               algo=tpe.suggest,
               max_evals=50 )
  """
  """
  best = fmin( fn=dig.hyperopt_wrapper,
               space=(
                      hp.lognormal('C', 0, 2 ),
                      hp.choice('loss', ['l1', 'l2']),
                      #hp.choice('penalty', ['l1', 'l2'])
                     ),
               algo=tpe.suggest,
               max_evals=30 )
  """
  """
  best = fmin( fn=dig.hyperopt_wrapper,
               space={
                      'C':hp.lognormal('C', 0, 2 ),
                      'loss':hp.choice('loss', ['l1', 'l2']),
                     },
               algo=tpe.suggest,
               max_evals=30 )
  """
  """
  best = fmin( fn=dig.hyperopt_wrapper,
               space={
                     'C':hp.lognormal('C', 0.0, 2.0 ),
                     'kernel':hp.choice('kernel', ['rbf', 'sigmoid', 'linear']),
                     },
               algo=tpe.suggest,
               max_evals=20 )
  """
  from hpsklearn.components import svc_linear, sklearn_SVC, any_classifier
  #print svc_linear('hai')
  print sklearn_SVC()
  #"""
  best = fmin( fn=dig.hyperopt_sklearn_wrapper,
               #space={'classifier':any_classifier('hai'),'preprocessing':[]},
               space=any_classifier('hai'),
               algo=tpe.suggest,
               max_evals=20 )
  #"""
  """
  best = fmin( fn=dig.hyperopt_wrapper,
               space={
                     'C':hp.lognormal('C', 0, 2 ),
                     'svmkernel':hp.choice('svmkernel', [
                       { 'kernel':'rbf' },
                       { 'kernel':'sigmoid' },
                       { 'kernel':'linear', 
                         'degree':hp.quniform( 'degree', 1, 5, 1 ) } ] ),
                     },
               algo=tpe.suggest,
               max_evals=20 )
  """
  """
  from hpsklearn.components import svc_linear
  best = fmin( fn=dig.hyperopt_wrapper,
               space=svc_linear('name of svc'),
               algo=tpe.suggest,
               max_evals=30 )
  """
  print best
  #print dig.best_f1_score
  print "Precision Score: %s" % dig.best_score
  ##print "C value: %s" % dig.best_C
  #dig.test_model( best, classifier="SVC" )
  # Convert from index to string
  #best['kernel'] = ['rbf', 'sigmoid', 'linear'][best['kernel']]
  dig.test_model( classifier="SVC", **best )
コード例 #14
0
def main(data='newsgroups',
         algo='tpe',
         seed=1,
         evals=100,
         clf='any',
         loss=None,
         pre='any',
         text=''):
    filename = text + algo + '_' + clf + '_' + pre + '_' + str(seed) + '_' + str(evals) + \
               '_' + data

    if loss is not None:
        if hasattr(metrics, loss):
            loss = getattr(metrics, loss)
        else:
            print('Unknown loss metric specified')
            return 1

    if algo == 'tpe':
        algorithm = tpe.suggest
    elif algo == 'anneal':
        algorithm = anneal.suggest
    elif algo == 'rand':
        algorithm = rand.suggest
    elif algo == 'tree':
        algorithm = hypertree.tree.suggest
    elif algo == 'gp_tree':
        algorithm = hypertree.gp_tree.suggest
    else:
        print('Unknown algorithm specified')
        return 1

    # TODO: impose restrictions on classifiers that do not work on sparse data
    if clf == 'any':
        if data in ['newsgroups']:
            classifier = any_sparse_classifier('clf')
        else:
            classifier = any_classifier('clf')
    elif clf == 'knn':
        if data in ['newsgroups']:
            classifier = knn('clf', sparse_data=True)
        else:
            classifier = knn('clf')
    elif clf == 'nearest_centroid':
        if data in ['newsgroups']:
            classifier = nearest_centroid('clf', sparse_data=True)
        else:
            classifier = nearest_centroid('clf')
    elif hasattr(hpsklearn.components, clf):
        classifier = getattr(hpsklearn.components, clf)('clf')
    else:
        print('Unknown classifier specified')
        return 1
    """
  elif clf == 'svc':
    classifier = svc('clf') 
  elif clf == 'knn':
    if data in ['newsgroups']:
      classifier = knn('clf', sparse_data=True) 
    else:
      classifier = knn('clf') 
  elif clf == 'sgd':
    classifier = sgd('clf') 
  elif clf == 'random_forest':
    classifier = random_forest('clf') 
  elif clf == 'extra_trees':
    classifier = extra_trees('clf') 
  elif clf == 'liblinear_svc':
    classifier = liblinear_svc('clf') 
  elif clf == 'multinomial_nb':
    classifier = multinomial_nb('clf') 
  elif clf == 'nearest_centroid':
    if data in ['newsgroups']:
      classifier = nearest_centroid('clf', sparse_data=True) 
    else:
      classifier = nearest_centroid('clf') 
  elif clf == 'rbm':
    classifier = rbm('clf') 
  elif clf == 'colkmeans':
    classifier = colkmeans('clf') 
  else:
    print( 'Unknown classifier specified' )
    return 1
  """

    if pre == 'any':
        if data in ['newsgroups']:
            preproc = any_text_preprocessing('pre')
        else:
            preproc = any_preprocessing('pre')
    elif pre == 'none':
        preproc = []
    elif hasattr(hpsklearn.components, pre):
        preproc = [getattr(hpsklearn.components, pre)('pre')]
    else:
        print('Unknown preprocessing specified')
        return 1
    """
  elif pre == 'pca':
    preproc = [pca('pre')]
  elif pre == 'standard_scaler':
    preproc = [standard_scaler('pre')]
  elif pre == 'min_max_scaler':
    preproc = [min_max_scaler('pre')]
  elif pre == 'normalizer':
    preproc = [normalizer('pre')]
  elif pre == 'tfidf':
    preproc = [tfidf('pre')]
  """

    if data == 'newsgroups':
        sklearn_newsgroups(classifier=classifier,
                           algorithm=algorithm,
                           max_evals=evals,
                           seed=seed,
                           filename=filename,
                           preproc=preproc,
                           loss=loss)
    elif data == 'convex':
        if CONVEX_EXISTS:
            sklearn_convex(classifier=classifier,
                           algorithm=algorithm,
                           max_evals=evals,
                           seed=seed,
                           filename=filename,
                           preproc=preproc,
                           loss=loss)
        else:
            print(
                "Convex dataset not detected on your system, install from MLPython"
            )
            return 1
    elif data == 'mnist':
        sklearn_mnist(classifier=classifier,
                      algorithm=algorithm,
                      max_evals=evals,
                      seed=seed,
                      filename=filename,
                      preproc=preproc,
                      loss=loss)
    elif data == 'digits':
        sklearn_digits(classifier=classifier,
                       algorithm=algorithm,
                       max_evals=evals,
                       seed=seed,
                       filename=filename,
                       preproc=preproc,
                       loss=loss)
    else:
        print("Unknown dataset specified")
コード例 #15
0
 def test_fit(self):
     model = hyperopt_estimator(
         classifier=components.any_classifier('classifier'), 
         verbose=1, max_evals=5, trial_timeout=5.0)
     model.fit(self.X, self.Y)
     assert len(model.trials.trials) == 5
コード例 #16
0
def main( data='newsgroups', algo='tpe', seed=1, evals=100, clf='any',
          loss=None, pre='any', text='' ):
  filename = text + algo + '_' + clf + '_' + pre + '_' + str(seed) + '_' + str(evals) + \
             '_' + data
  
  if loss is not None:
    if hasattr( metrics, loss ):
      loss = getattr( metrics, loss )
    else:
      print( 'Unknown loss metric specified' )
      return 1

  if algo == 'tpe':
    algorithm = tpe.suggest
  elif algo == 'anneal':
    algorithm = anneal.suggest
  elif algo == 'rand':
    algorithm = rand.suggest
  elif algo == 'tree':
    algorithm = hypertree.tree.suggest
  elif algo == 'gp_tree':
    algorithm = hypertree.gp_tree.suggest
  else:
    print( 'Unknown algorithm specified' )
    return 1
  
  # TODO: impose restrictions on classifiers that do not work on sparse data
  if clf == 'any':
    if data in ['newsgroups']:
      classifier = any_sparse_classifier('clf') 
    else:
      classifier = any_classifier('clf')
  elif clf == 'knn':
    if data in ['newsgroups']:
      classifier = knn('clf', sparse_data=True) 
    else:
      classifier = knn('clf') 
  elif clf == 'nearest_centroid':
    if data in ['newsgroups']:
      classifier = nearest_centroid('clf', sparse_data=True) 
    else:
      classifier = nearest_centroid('clf') 
  elif hasattr( hpsklearn.components, clf ):
    classifier = getattr( hpsklearn.components, clf )( 'clf' )
  else:
    print( 'Unknown classifier specified' )
    return 1
  """
  elif clf == 'svc':
    classifier = svc('clf') 
  elif clf == 'knn':
    if data in ['newsgroups']:
      classifier = knn('clf', sparse_data=True) 
    else:
      classifier = knn('clf') 
  elif clf == 'sgd':
    classifier = sgd('clf') 
  elif clf == 'random_forest':
    classifier = random_forest('clf') 
  elif clf == 'extra_trees':
    classifier = extra_trees('clf') 
  elif clf == 'liblinear_svc':
    classifier = liblinear_svc('clf') 
  elif clf == 'multinomial_nb':
    classifier = multinomial_nb('clf') 
  elif clf == 'nearest_centroid':
    if data in ['newsgroups']:
      classifier = nearest_centroid('clf', sparse_data=True) 
    else:
      classifier = nearest_centroid('clf') 
  elif clf == 'rbm':
    classifier = rbm('clf') 
  elif clf == 'colkmeans':
    classifier = colkmeans('clf') 
  else:
    print( 'Unknown classifier specified' )
    return 1
  """

  if pre == 'any':
    if data in ['newsgroups']:
      preproc = any_text_preprocessing('pre')
    else:
      preproc = any_preprocessing('pre')
  elif pre == 'none':
    preproc = []
  elif hasattr( hpsklearn.components, pre ):
    preproc = [getattr( hpsklearn.components, pre)( 'pre' )]
  else:
    print( 'Unknown preprocessing specified' )
    return 1
  """
  elif pre == 'pca':
    preproc = [pca('pre')]
  elif pre == 'standard_scaler':
    preproc = [standard_scaler('pre')]
  elif pre == 'min_max_scaler':
    preproc = [min_max_scaler('pre')]
  elif pre == 'normalizer':
    preproc = [normalizer('pre')]
  elif pre == 'tfidf':
    preproc = [tfidf('pre')]
  """

  if data == 'newsgroups':
    sklearn_newsgroups( classifier=classifier, algorithm=algorithm, 
                        max_evals=evals, seed=seed, filename=filename,
                        preproc=preproc, loss=loss )
  elif data == 'convex':
    if CONVEX_EXISTS:
      sklearn_convex( classifier=classifier, algorithm=algorithm, 
                      max_evals=evals, seed=seed, filename=filename,
                      preproc=preproc, loss=loss )
    else:
      print("Convex dataset not detected on your system, install from MLPython")
      return 1
  elif data == 'mnist':
    sklearn_mnist( classifier=classifier, algorithm=algorithm, 
                   max_evals=evals, seed=seed, filename=filename,
                   preproc=preproc, loss=loss )
  elif data == 'digits':
    sklearn_digits( classifier=classifier, algorithm=algorithm, 
                    max_evals=evals, seed=seed, filename=filename,
                    preproc=preproc, loss=loss )
  else:
    print( "Unknown dataset specified" )