Beispiel #1
0
 def __init__(self):
     '''You may here define the structure of your model. You can create your own type
     of ensemble. You can make ensembles of pipelines or pipelines of ensembles.
     This example votes among two classifiers: BasicClassifier and a pipeline
     whose classifier is itself an ensemble of GaussianNB classifiers.'''
     fancy_classifier = Pipeline([
         ('preprocessing', Preprocessor()),
         ('classification', BaggingClassifier(base_estimator=GaussianNB()))
     ])
     self.clf = VotingClassifier(estimators=[('basic', BasicClassifier()),
                                             ('fancy', fancy_classifier)],
                                 voting='soft')
Beispiel #2
0
def test(D):  
    '''Function to try some examples classifiers'''    
    classifier_dict = {
            '1. MonsterClassifier': MonsterClassifier(),
            '2. SimplePipeline': Pipeline([('prepro', Preprocessor()), ('classif', BasicClassifier())]),
            '3. RandomPred': RandomPredictor(),
            '4. Linear Discriminant Analysis': LinearDiscriminantAnalysis()}
            
    for key in classifier_dict:
        myclassifier = classifier_dict[key]
        acc = D.ClfScatter(myclassifier) # Replace by a call to ClfScatter
              
    return acc # Return the last accuracy (important to get the correct answer in the TP)
Beispiel #3
0
    def __init__(self):
        '''You may here define the structure of your model. You can create your own type
        of ensemble. You can make ensembles of pipelines or pipelines of ensembles.
        This example votes among two classifiers: BasicClassifier and a pipeline
        whose classifier is itself an ensemble of GaussianNB classifiers.'''
        fancy_classifier = Pipeline([
					('preprocessing', Preprocessor()),
					('classification', BaggingClassifier(base_estimator=GaussianNB(),random_state=1))
					])
        self.clf = VotingClassifier(estimators=[
					('Linear Discriminant Analysis', LinearDiscriminantAnalysis()),
					('Gaussian Classifier', GaussianNB()),
					('Support Vector Machine', SVC(probability=True)),
					('Fancy Classifier', fancy_classifier)],
					voting='soft')   
Beispiel #4
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*- itself.

from DataManager import DataManager
from zPreprocessor import Preprocessor
input_dir = "../public_data"
output_dir = "../res"

basename = 'movierec'
D = DataManager(basename, input_dir)  # Load data
print("*** Original data ***")
print D

Prepro = Preprocessor()

# Preprocess on the data and load it back into D
D.data['X_train'] = Prepro.fit_transform(D.data['X_train'], D.data['Y_train'])
D.data['X_valid'] = Prepro.transform(D.data['X_valid'])
D.data['X_test'] = Prepro.transform(D.data['X_test'])

# Here show something that proves that the preprocessing worked fine
print("*** Transformed data ***")
print D

# Preprocessing gives you opportunities of visualization:
# Scatter-plots of the 2 first principal components
# Scatter plots of pairs of features that are most relevant
import matplotlib.pyplot as plt
X = D.data['X_train']
Y = D.data['Y_train']
plt.scatter(X[:, 0], X[:, 1], c=Y)
Beispiel #5
0
 def __init__(self):
     regressor = Pipeline([('preprocessing', Preprocessor()),
                           ('regression', linear_model.LinearRegression())])
     self.clf = regressor
Beispiel #6
0
    from sklearn.metrics import accuracy_score
    # Interesting point: the M2 prepared challenges using sometimes AutoML challenge metrics
    # not scikit-learn metrics. For example:
    from libscores import bac_metric
    from libscores import auc_metric

    from zDataManager import DataManager  # The class provided by binome 1

    basename = 'Iris'
    D = DataManager(basename, input_dir)  # Load data
    print D

    # Here we define 3 classifiers and compare them
    classifier_dict = {
        'Pipeline':
        Pipeline([('prepro', Preprocessor()), ('classif', BasicClassifier())]),
        'Classifier':
        Classifier(),
        'Random':
        RandomPredictor()
    }

    print "Classifier\tAUC\tBAC\tACC\tError bar"
    for key in classifier_dict:
        myclassifier = classifier_dict[key]

        # Train
        Ytrue_tr = D.data['Y_train']
        myclassifier.fit(D.data['X_train'], Ytrue_tr)

        # Some classifiers and cost function use a different encoding of the target