Esempio n. 1
0
        labels = pd.unique(y)

        trnX, tstX, trnY, tstY = train_test_split(X,
                                                  y,
                                                  train_size=0.7,
                                                  stratify=y)

        clf = LogisticRegression(random_state=RANDOM_STATE)
        clf.fit(trnX, trnY)
        prd_trn = clf.predict(trnX)
        prd_tst = clf.predict(tstX)

        train_accuracy = metrics.accuracy_score(trnY, prd_trn)
        test_accuracy = metrics.accuracy_score(tstY, prd_tst)

        text = key
        if (do_feature_eng): text += ' with FS'
        best_accuracies[text] = [train_accuracy, test_accuracy]

        ds.plot_evaluation_results(pd.unique(y), trnY, prd_trn, tstY, prd_tst)
        plt.suptitle('QOT Log Regression - ' + key +
                     ' - Performance & Confusion matrix')
        plt.savefig(subDir + 'QOT Log Regression - ' + key +
                    ' - Performance & Confusion matrix')

        plt.close("all")

plt.figure(figsize=(7, 7))
ds.multiple_bar_chart(['Train', 'Test'], best_accuracies, ylabel='Accuracy')
plt.suptitle('QOT Sampling & Feature Selection')
plt.savefig(graphsDir + 'QOT Sampling & Feature Selection')
Esempio n. 2
0
            d = dist[k]
            ds.multiple_line_chart(nvalues,
                                   overfitting_values[d],
                                   ax=axs[0, k],
                                   title='Overfitting for dist = %s' % (d),
                                   xlabel='K Neighbours',
                                   ylabel='accuracy',
                                   percentage=True)
        plt.suptitle('QOT Overfitting - KNN')
        plt.savefig(subDir + 'QOT Overfitting - KNN')

        clf = knn = KNeighborsClassifier(n_neighbors=best[0], metric=best[1])
        clf.fit(trnX, trnY)
        prd_trn = clf.predict(trnX)
        prd_tst = clf.predict(tstX)
        ds.plot_evaluation_results(["negative", "positive"], trnY, prd_trn,
                                   tstY, prd_tst)
        plt.suptitle('QOT KNN - ' + key +
                     '- Performance & Confusion matrix - %d neighbors and %s' %
                     (best[0], best[1]))
        plt.savefig(subDir + 'QOT KNN - ' + key +
                    ' - Performance & Confusion matrix')

        plt.close("all")

plt.figure(figsize=(7, 7))
ds.multiple_bar_chart(['Train', 'Test'], best_accuracies, ylabel='Accuracy')
plt.suptitle('QOT Sampling & Feature Selection')
plt.savefig(graphsDir + 'QOT Sampling & Feature Selection')

plt.figure(figsize=(7, 7))
ds.multiple_bar_chart(['Train', 'Test'], recalls, ylabel='Recall')
Esempio n. 3
0
        values = {}
        for lr in learning_rate:
            yvalues = []
            for n in n_estimators:
                gb.fit(trnX, trnY)
                prdY = gb.predict(tstX)
                yvalues.append(metrics.accuracy_score(tstY, prdY))
                if yvalues[-1] > last_best:
                    best = (d, lr, n)
                    last_best = yvalues[-1]
                    best_tree = gb
            values[lr] = yvalues

    prd_trn = best_tree.predict(trnX)
    prd_tst = best_tree.predict(tstX)
    ds.plot_evaluation_results(pd.unique(y), trnY, prd_trn, tstY, prd_tst,
                               str(samp))
    print(str(samp) + ': ' + str(last_best))
'''
 UNDERSAMPLING
allknn = AllKNN()
trnX, trnY = allknn.fit_resample(trnX, trnY)

nm = NearMiss()
trnX, trnY = nm.fit_resample(trnX, trnY)

 OVERSAMPLING 
smt = SMOTE()
trnX, trnY = smt.fit_resample(trnX, trnY)

ada = ADASYN(random_state=42)
trnX, trnY = smt.fit_sample(trnX, trnY)