Beispiel #1
0
 def draw_learning_curve(self, cv, scoring='accuracy', n_jobs=5):
     visualizer = LearningCurve(self.model,
                                cv=cv,
                                scoring=scoring,
                                n_jobs=n_jobs)
     visualizer.fit(self.training_data, self.training_labels)
     visualizer.poof()
def learning_curve_clusterer(path="images/learning_curve_clusterer.png"):

    X, y = make_blobs(n_samples=1000, centers=5)

    _, ax = plt.subplots()
    sizes = np.linspace(0.3, 1.0, 10)

    oz = LearningCurve(
        KMeans(), ax=ax, train_sizes=sizes, scoring="adjusted_rand_score"
    )
    oz.fit(X, y)
    oz.poof(outpath=path)
Beispiel #3
0
def visualizeLearningCurve(classifier, features, labels, scoring='precision'):

    sizes = numpy.linspace(0.1, 1.0, 10)
    cv = StratifiedKFold(10)
    visualizer = LearningCurve(classifier,
                               cv=cv,
                               train_sizes=sizes,
                               scoring=scoring,
                               n_jobs=10)

    visualizer.fit(features.drop(["appid", "name"], axis=1),
                   list(map(convertLabelToNumber, labels)))
    visualizer.poof()
Beispiel #4
0
def learning_curve_clusterer(path="images/learning_curve_clusterer.png"):

    X, y = make_blobs(n_samples=1000, centers=5)

    _, ax = plt.subplots()
    sizes = np.linspace(0.3, 1.0, 10)

    oz = LearningCurve(KMeans(),
                       ax=ax,
                       train_sizes=sizes,
                       scoring="adjusted_rand_score")
    oz.fit(X, y)
    oz.poof(outpath=path)
def learning_curve_sklearn_example(path="images/learning_curve_sklearn_example.png"):
    digits = load_digits()
    X, y = digits.data, digits.target

    _, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(9,4))

    cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
    oz = LearningCurve(GaussianNB(), ax=ax[0], cv=cv, n_jobs=4)
    oz.fit(X, y)
    oz.finalize()

    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
    oz = LearningCurve(SVC(gamma=0.001), ax=ax[1], cv=cv, n_jobs=4)
    oz.fit(X, y)
    oz.poof(outpath=path)
Beispiel #6
0
def evaluation(estimator, X, Y, x, y):

    classes = [Y[1], Y[0]]
    f, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(18, 6))

    #Confusion Matrix
    cmm = ConfusionMatrix(model=estimator,
                          ax=ax1,
                          classes=classes,
                          label_encoder={
                              0.0: 'Negativo',
                              1.0: 'Positivo'
                          })
    cmm.score(x, y)

    #ROCAUC
    viz = ROCAUC(model=estimator, ax=ax2)
    viz.fit(X, Y)
    viz.score(x, y)

    #Learning Curve
    cv_strategy = StratifiedKFold(n_splits=3)
    sizes = np.linspace(0.3, 1.0, 10)
    visualizer = LearningCurve(estimator,
                               ax=ax,
                               cv=cv_strategy,
                               scoring='roc_auc',
                               train_sizes=sizes,
                               n_jobs=4)
    visualizer.fit(X, Y)

    cmm.poof(), viz.poof(), visualizer.poof()
    plt.show()
def learning_curve_regressor(path="images/learning_curve_regressor.png"):

    data = pd.read_csv(os.path.join(FIXTURES, "energy", "energy.csv"))

    targets = ["heating load", "cooling load"]
    features = [col for col in data.columns if col not in targets]

    X = data[features]
    y = data[targets[0]]

    _, ax = plt.subplots()
    sizes = np.linspace(0.3, 1.0, 10)

    oz = LearningCurve(RidgeCV(), ax=ax, train_sizes=sizes, scoring='r2')
    oz.fit(X, y)
    oz.poof(outpath=path)
Beispiel #8
0
def learning_curve_regressor(path="images/learning_curve_regressor.png"):

    data = pd.read_csv(os.path.join(FIXTURES, "energy", "energy.csv"))

    targets = ["heating load", "cooling load"]
    features = [col for col in data.columns if col not in targets]

    X = data[features]
    y = data[targets[0]]

    _, ax = plt.subplots()
    sizes = np.linspace(0.3, 1.0, 10)

    oz = LearningCurve(RidgeCV(), ax=ax, train_sizes=sizes, scoring='r2')
    oz.fit(X, y)
    oz.poof(outpath=path)
Beispiel #9
0
def learning_curve_sklearn_example(
        path="images/learning_curve_sklearn_example.png"):
    digits = load_digits()
    X, y = digits.data, digits.target

    _, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(9, 4))

    cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
    oz = LearningCurve(GaussianNB(), ax=ax[0], cv=cv, n_jobs=4)
    oz.fit(X, y)
    oz.finalize()

    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
    oz = LearningCurve(SVC(gamma=0.001), ax=ax[1], cv=cv, n_jobs=4)
    oz.fit(X, y)
    oz.poof(outpath=path)
Beispiel #10
0
def learning_curve(model, X, y):
    # from sklearn.model_selection import StratifiedKFold
    from sklearn.model_selection import RepeatedStratifiedKFold

    from yellowbrick.model_selection import LearningCurve

    # Create the learning curve visualizer
    # cv = StratifiedKFold(12)
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=10, random_state=1)
    sizes = np.linspace(0.3, 1.0, 10)

    viz = LearningCurve(model,
                        cv=cv,
                        train_sizes=sizes,
                        scoring='neg_log_loss',
                        n_jobs=4)

    # Fit and poof the visualizer
    viz.fit(X, y)
    viz.poof()
def learning_curve_classifier(path="images/learning_curve_classifier.png"):

    data = pd.read_csv(os.path.join(FIXTURES, "game", "game.csv"))

    target = "outcome"
    features = [col for col in data.columns if col != target]

    X = pd.get_dummies(data[features])
    y = data[target]

    _, ax = plt.subplots()
    cv = StratifiedKFold(12)
    sizes = np.linspace(0.3, 1.0, 10)

    oz = LearningCurve(
        MultinomialNB(), ax=ax, cv=cv, n_jobs=4,
        train_sizes=sizes, scoring='f1_weighted'
    )

    oz.fit(X, y)
    oz.poof(outpath=path)
Beispiel #12
0
def learning_curve_classifier(path="images/learning_curve_classifier.png"):

    data = pd.read_csv(os.path.join(FIXTURES, "game", "game.csv"))

    target = "outcome"
    features = [col for col in data.columns if col != target]

    X = pd.get_dummies(data[features])
    y = data[target]

    _, ax = plt.subplots()
    cv = StratifiedKFold(12)
    sizes = np.linspace(0.3, 1.0, 10)

    oz = LearningCurve(MultinomialNB(),
                       ax=ax,
                       cv=cv,
                       n_jobs=4,
                       train_sizes=sizes,
                       scoring='f1_weighted')

    oz.fit(X, y)
    oz.poof(outpath=path)
    ax=ax)
val_curve.fit(X, y)
val_curve.poof()
fig.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize=(16, 9))
l_curve = LearningCurve(
    KNeighborsRegressor(n_neighbors=best_k),
    train_sizes=np.arange(.1, 1.01, .1),
    scoring=rmse_score,
    cv=5,
    #                         n_jobs=-1,
    ax=ax)
l_curve.fit(X, y)
l_curve.poof()
fig.tight_layout()
plt.show()

# Binary Classification
y_binary = (y > y.median()).astype(int)
n_neighbors = tuple(range(5, 151, 10))
n_folds = 5
scoring = 'roc_auc'

pipe = Pipeline([('scaler', StandardScaler()),
                 ('knn', KNeighborsClassifier())])

param_grid = {'knn__n_neighbors': n_neighbors}

estimator = GridSearchCV(
Beispiel #14
0
print(' score kplis strat cross validation :{}'.format(result2))
print('moyenne score cross validation : {:.2f}'.format(result2.mean()))

result3 = cross_val_score(arbre, x_train, y_train, cv=shuffle)
print(' score shuffle split cross validation :{}'.format(result3))
print('moyenne score cross validation : {:.2f}'.format(result3.mean()))

cm = ConfusionMatrix(arbre, classes=[0, 1, 2, 3, 4, 5, 6], percent=True)
cm.fit(x_train, y_train)
cm.score(x_test, y_test)
cm.poof()

size = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
lc = LearningCurve(DecisionTreeClassifier(), train_sizes=size, score='r2')
lc.fit(x_train, y_train)
lc.poof()
''' ---------------------- Forêt aléatoire ------------------------'''

foret = RandomForestClassifier(n_estimators=120,
                               max_features='sqrt',
                               n_jobs=-1,
                               random_state=0)
foret.fit(x_train, y_train)

result = cross_val_score(foret, x_train, y_train, cv=5)
print(' score cross validation :{}'.format(result))
print('moyenne score cross validation : {:.2f}'.format(result.mean()))

result1 = cross_val_score(foret, x_train, y_train, cv=kplis)
print(' score kplis cross validation :{}'.format(result1))
print('moyenne score cross validation : {:.2f}'.format(result1.mean()))