def log_reg(x, y, show_pcm=False): x = scale(x) xtrain, xtest, ytrain, ytest = train_test_split(x, y, random_state=0, test_size=0.25) lr = LogisticRegression(random_state=0, multi_class='auto', max_iter=10000) lr.fit(xtrain, ytrain) print(f"Logistic Regression Score: {100*round(lr.score(xtest,ytest),3)}%") if show_pcm: pcm(lr, xtest, ytest, normalize='true') plt.title( f"Logistic Regression Score: {100*round(lr.score(xtest,ytest),3)}%" ) plt.show()
def svc_poly(x, y, show_pcm=False): x = scale(x) xtrain, xtest, ytrain, ytest = train_test_split(x, y, random_state=0, test_size=0.25) svc = SVC(degree=5, kernel='poly', random_state=0, gamma='auto') svc.fit(xtrain, ytrain) print(f"SVM Poly Training set score: {100*svc.score(xtrain, ytrain):.2f}%") print(f"SVM Poly Test set score: {100*svc.score(xtest, ytest):.2f}%") if show_pcm: pcm(svc, xtest, ytest, normalize='true') plt.title(f"SVM Poly Kernel Score: {100*svc.score(xtest, ytest):.2f}%") plt.show()
def svc_linear_kernel(x, y, show_pcm=False): x = scale(x) xtrain, xtest, ytrain, ytest = train_test_split(x, y, random_state=0, test_size=0.25) svc = SVC(C=10, degree=1, kernel='poly') svc.fit(xtrain, ytrain) print( f"SVM Gaussian Training set score: {100*svc.score(xtrain, ytrain):.2f}%" ) print(f"SVM Gaussian Test set score: {100*svc.score(xtest, ytest):.2f}%") if show_pcm: pcm(svc, xtest, ytest, normalize='true') plt.title( f"SVM Linear Gaussian Score: {100*svc.score(xtest, ytest):.2f}%") plt.show()
def svc_rbf(x, y, show_pcm=False): x = scale(x) xtrain, xtest, ytrain, ytest = train_test_split(x, y, random_state=0, test_size=0.25) svc = SVC(C=10, gamma='auto', random_state=0) svc.fit(xtrain, ytrain) print( f"SVM Gaussian Training set score: {100*svc.score(xtrain, ytrain):.2f}%" ) print(f"SVM Gaussian Test set score: {100*svc.score(xtest, ytest):.2f}%") if show_pcm: pcm(svc, xtest, ytest, normalize='true') plt.title( f"SVM RBF Gaussian Score: {100*svc.score(xtest, ytest):.2f}%") plt.show()
def linear_SVC(x, y, show_pcm=False): x = scale(x) xtrain, xtest, ytrain, ytest = train_test_split(x, y, random_state=0, test_size=0.25) lsvc = LinearSVC(C=100, random_state=0, tol=1e-5) lsvc.fit(xtrain, ytrain) print( f"Linear SVM Training set score: {100*lsvc.score(xtrain, ytrain):.2f}%" ) print(f"Linear SVM Test set score: {100*lsvc.score(xtest, ytest):.2f}%") lsvc.predict(xtest) print(f"Linear SVC Coeffienct: {lsvc.coef_}") print(f"Linear SVC Intercept: {lsvc.intercept_}") if show_pcm: pcm(lsvc, xtest, ytest, normalize='true') plt.title(f"Linear SVC Score: {100*lsvc.score(xtest, ytest):.2f}%") plt.show()
def k_nearest(x, y, show_pcm=False): x = scale(x) xtrain, xtest, ytrain, ytest = train_test_split(x, y, random_state=0, test_size=0.25) neighbors = np.arange(1, 50) train_acc = np.empty(len(neighbors)) test_acc = np.empty(len(neighbors)) #run through testing for each k number of neighbors for i, k in enumerate(neighbors): #training #setting the metric to minkowski and p = 1 set the algorithm used to manhattan knn = KNeighborsClassifier(n_neighbors=k, metric='minkowski', p=1, weights='distance') knn.fit(xtrain, ytrain) #Save accuracy for both training and testing train_acc[i] = knn.score(xtrain, ytrain) test_acc[i] = knn.score(xtest, ytest) knn = KNeighborsClassifier(n_neighbors=15, metric='minkowski', p=1, weights='distance') knn.fit(xtrain, ytrain) print(f"KNN Score: {100*round(knn.score(xtest,ytest),3)}%") if show_pcm: pcm(knn, xtest, ytest, normalize='true') #Show Relevant plots based on requirements plt.figure() plt.plot(neighbors, test_acc, label="Testing Dataset Accuracy KNN") plt.title(f"KNN Score: {100*round(knn.score(xtest,ytest),3)}%") plt.legend() plt.xlabel("n_neighbors") plt.ylabel("Accuracy") plt.show()
pd.plotting.scatter_matrix(Data) X = Data.iloc[:, [1-11]] x = scale(X) Y = Data["A"] X_train, X_test, Y_train, Y_test = train_test_split(x, Y, test_size=0.25, random_state=42) lr = LogisticRegression(random_state=0) lr.fit(X_train, Y_train) print("lr.coef_: {}".format(lr.coef_)) print("lr.intercept_: {}".format(lr.intercept_)) print("Training set score: {:.2f}".format(lr.score(X_train, Y_train))) print("Test set score: {:.2f}".format(lr.score(X_test, Y_test))) # KNN Confusion Matrix knn = KNeighborsClassifier(n_neighbors=15, metric='minkowski', p=1, weights='distance') knn.fit(X_train, Y_train) pcm(knn, X_test, Y_test, normalize='true', display_labels=['Not Bankrupt', 'Bankrupt']) # LR Confusion Matrix lrc = LogisticRegression(solver='liblinear', random_state=0) lrc.fit(X_train, Y_train) pcm(lrc, X_test, Y_test, normalize='true', display_labels=['Not Bankrupt', 'Bankrupt']) # SVM Confusion Matrix svc = SVC(C=10, degree=1, kernel='poly') svc.fit(X_train, Y_train) pcm(svc, X_test, Y_test, normalize='true', display_labels=['Not Bankrupt', 'Bankrupt']) plt.show()