Ejemplo n.º 1
0
def log_reg(x, y, show_pcm=False):
    x = scale(x)
    xtrain, xtest, ytrain, ytest = train_test_split(x,
                                                    y,
                                                    random_state=0,
                                                    test_size=0.25)

    lr = LogisticRegression(random_state=0, multi_class='auto', max_iter=10000)
    lr.fit(xtrain, ytrain)
    print(f"Logistic Regression Score: {100*round(lr.score(xtest,ytest),3)}%")
    if show_pcm:
        pcm(lr, xtest, ytest, normalize='true')
        plt.title(
            f"Logistic Regression Score: {100*round(lr.score(xtest,ytest),3)}%"
        )
        plt.show()
Ejemplo n.º 2
0
def svc_poly(x, y, show_pcm=False):
    x = scale(x)
    xtrain, xtest, ytrain, ytest = train_test_split(x,
                                                    y,
                                                    random_state=0,
                                                    test_size=0.25)

    svc = SVC(degree=5, kernel='poly', random_state=0, gamma='auto')
    svc.fit(xtrain, ytrain)
    print(f"SVM Poly Training set score: {100*svc.score(xtrain, ytrain):.2f}%")
    print(f"SVM Poly Test set score: {100*svc.score(xtest, ytest):.2f}%")

    if show_pcm:
        pcm(svc, xtest, ytest, normalize='true')
        plt.title(f"SVM Poly Kernel Score: {100*svc.score(xtest, ytest):.2f}%")
        plt.show()
Ejemplo n.º 3
0
def svc_linear_kernel(x, y, show_pcm=False):
    x = scale(x)
    xtrain, xtest, ytrain, ytest = train_test_split(x,
                                                    y,
                                                    random_state=0,
                                                    test_size=0.25)

    svc = SVC(C=10, degree=1, kernel='poly')
    svc.fit(xtrain, ytrain)
    print(
        f"SVM Gaussian Training set score: {100*svc.score(xtrain, ytrain):.2f}%"
    )
    print(f"SVM Gaussian Test set score: {100*svc.score(xtest, ytest):.2f}%")

    if show_pcm:
        pcm(svc, xtest, ytest, normalize='true')
        plt.title(
            f"SVM Linear Gaussian Score: {100*svc.score(xtest, ytest):.2f}%")
        plt.show()
Ejemplo n.º 4
0
def svc_rbf(x, y, show_pcm=False):
    x = scale(x)
    xtrain, xtest, ytrain, ytest = train_test_split(x,
                                                    y,
                                                    random_state=0,
                                                    test_size=0.25)

    svc = SVC(C=10, gamma='auto', random_state=0)
    svc.fit(xtrain, ytrain)
    print(
        f"SVM Gaussian Training set score: {100*svc.score(xtrain, ytrain):.2f}%"
    )
    print(f"SVM Gaussian Test set score: {100*svc.score(xtest, ytest):.2f}%")

    if show_pcm:
        pcm(svc, xtest, ytest, normalize='true')
        plt.title(
            f"SVM RBF Gaussian Score: {100*svc.score(xtest, ytest):.2f}%")
        plt.show()
Ejemplo n.º 5
0
def linear_SVC(x, y, show_pcm=False):
    x = scale(x)
    xtrain, xtest, ytrain, ytest = train_test_split(x,
                                                    y,
                                                    random_state=0,
                                                    test_size=0.25)

    lsvc = LinearSVC(C=100, random_state=0, tol=1e-5)
    lsvc.fit(xtrain, ytrain)
    print(
        f"Linear SVM Training set score: {100*lsvc.score(xtrain, ytrain):.2f}%"
    )
    print(f"Linear SVM Test set score: {100*lsvc.score(xtest, ytest):.2f}%")

    lsvc.predict(xtest)
    print(f"Linear SVC Coeffienct: {lsvc.coef_}")
    print(f"Linear SVC Intercept: {lsvc.intercept_}")
    if show_pcm:
        pcm(lsvc, xtest, ytest, normalize='true')
        plt.title(f"Linear SVC Score: {100*lsvc.score(xtest, ytest):.2f}%")
        plt.show()
Ejemplo n.º 6
0
def k_nearest(x, y, show_pcm=False):
    x = scale(x)
    xtrain, xtest, ytrain, ytest = train_test_split(x,
                                                    y,
                                                    random_state=0,
                                                    test_size=0.25)

    neighbors = np.arange(1, 50)
    train_acc = np.empty(len(neighbors))
    test_acc = np.empty(len(neighbors))

    #run through testing for each k number of neighbors
    for i, k in enumerate(neighbors):
        #training
        #setting the metric to minkowski and p = 1 set the algorithm used to manhattan
        knn = KNeighborsClassifier(n_neighbors=k,
                                   metric='minkowski',
                                   p=1,
                                   weights='distance')
        knn.fit(xtrain, ytrain)
        #Save accuracy for both training and testing
        train_acc[i] = knn.score(xtrain, ytrain)
        test_acc[i] = knn.score(xtest, ytest)
    knn = KNeighborsClassifier(n_neighbors=15,
                               metric='minkowski',
                               p=1,
                               weights='distance')
    knn.fit(xtrain, ytrain)
    print(f"KNN Score: {100*round(knn.score(xtest,ytest),3)}%")
    if show_pcm:
        pcm(knn, xtest, ytest, normalize='true')
        #Show Relevant plots based on requirements
        plt.figure()
        plt.plot(neighbors, test_acc, label="Testing Dataset Accuracy KNN")
        plt.title(f"KNN Score: {100*round(knn.score(xtest,ytest),3)}%")
        plt.legend()
        plt.xlabel("n_neighbors")
        plt.ylabel("Accuracy")
        plt.show()
Ejemplo n.º 7
0
pd.plotting.scatter_matrix(Data)

X = Data.iloc[:, [1-11]]
x = scale(X)
Y = Data["A"]
X_train, X_test, Y_train, Y_test = train_test_split(x, Y, test_size=0.25, random_state=42)
lr = LogisticRegression(random_state=0)
lr.fit(X_train, Y_train)

print("lr.coef_: {}".format(lr.coef_))
print("lr.intercept_: {}".format(lr.intercept_))
print("Training set score: {:.2f}".format(lr.score(X_train, Y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, Y_test)))

# KNN Confusion Matrix
knn = KNeighborsClassifier(n_neighbors=15, metric='minkowski', p=1, weights='distance')
knn.fit(X_train, Y_train)
pcm(knn, X_test, Y_test, normalize='true', display_labels=['Not Bankrupt', 'Bankrupt'])


# LR Confusion Matrix
lrc = LogisticRegression(solver='liblinear', random_state=0)
lrc.fit(X_train, Y_train)
pcm(lrc, X_test, Y_test, normalize='true', display_labels=['Not Bankrupt', 'Bankrupt'])

# SVM Confusion Matrix
svc = SVC(C=10, degree=1, kernel='poly')
svc.fit(X_train, Y_train)
pcm(svc, X_test, Y_test, normalize='true', display_labels=['Not Bankrupt', 'Bankrupt'])
plt.show()