glass.shape # (214, 10) glass.info() glass['Type'].unique() # 7 types glass.head() # segregate dependent and independent variables y = glass['Type'].values x = glass.drop(['Type'], axis=1).values x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0) # building models knn = KNC(n_neighbors=8) # fit train data knn.fit(x_train, y_train) # model built y_train_pred = knn.predict( x_train) # predict "y_train" value based on model created train_acc = np.mean( y_train_pred == y_train) # check accuracy of predicted and real value train_acc # 73.68% knn.score(x_train, y_train) # 73.68% # check prediction accuracy of train data and classification error print(confusion_matrix(y_train, y_train_pred)) print(classification_report(y_train, y_train_pred)) # accuracy = 74% # fit test data
#Train-Test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) print(X_train) print(X_test) #model1 with 5 nearest neighbour(randomly choosing) knn = KNC(n_neighbors=5) m1 = knn.fit(X_train, y_train) pred = m1.predict(X_test) print(pred) print(confusion_matrix(y_test, pred)) print(classification_report(y_test, pred)) # Calculating the accuracy of model1 print(m1.score(X_test, y_test)) #>> accuracy = 0.6744186046511628 error = [] # Calculating error for K values between 1 and 40 for i in range(1, 40): knn = KNC(n_neighbors=i)
@author: rahul """ import pandas as pd import numpy as np zoo = pd.read_csv("E:\\Data Science\\Data Sheet\\zoo.csv") from sklearn.model_selection import train_test_split train, test = train_test_split(zoo, test_size=0.2) from sklearn.neighbors import KNeighborsClassifier as KNC # for 3 nearest neighbours neigh = KNC(n_neighbors=3) # Fitting with training data neigh.fit(train.iloc[:, 1:19], train.iloc[:, 0]) # train accuracy train_acc = np.mean(neigh.predict(train.iloc[:, 0:9]) == train.iloc[:, 9]) # test accuracy test_acc = np.mean(neigh.predict(test.iloc[:, 0:9]) == test.iloc[:, 9]) # 3 to 50 nearest neighbours and storing the accuracy values acc = [] for i in range(3, 50, 1): neigh = KNC(n_neighbors=i) neigh.fit(train.iloc[:, 1:19], train.iloc[:, 0]) train_acc = np.mean(neigh.predict(train.iloc[:, 1:19]) == train.iloc[:, 0]) test_acc = np.mean(neigh.predict(test.iloc[:, 1:19]) == test.iloc[:, 0]) acc.append([train_acc, test_acc])
mean 0.425743 0.198020 0.584158 ... 0.128713 0.435644 2.831683 std 0.496921 0.400495 0.495325 ... 0.336552 0.498314 2.102709 min 0.000000 0.000000 0.000000 ... 0.000000 0.000000 1.000000 25% 0.000000 0.000000 0.000000 ... 0.000000 0.000000 1.000000 50% 0.000000 0.000000 1.000000 ... 0.000000 0.000000 2.000000 75% 1.000000 0.000000 1.000000 ... 0.000000 1.000000 4.000000 max 1.000000 1.000000 1.000000 ... 1.000000 1.000000 7.000000 #getting the train test data's train,test=train_test_split(Zoo,test_size=0.3)#70% of train data and 30% of test data #defining a for loop function to get the accuracy of train test data's #metioning my K values I want to use x=[1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33] for k in x: if k%2!=0: neigh=KNC(n_neighbors=k) neigh.fit(train.iloc[:,1:17],train.iloc[:,17]) print("train_accuracy"+str(k)," : "+str(np.mean(neigh.predict(train.iloc[:,1:17])==train.iloc[:,17]))) print("test_accuracy"+str(k)," : "+str(np.mean(neigh.predict(test.iloc[:,1:17])==test.iloc[:,17]))) ######below shows the accuracy values that I got" #train_accuracy1 : 1.0 #test_accuracy1 : 1.0 #train_accuracy3 : 0.9714285714285714 #test_accuracy3 : 0.9354838709677419 #train_accuracy5 : 0.9428571428571428 #test_accuracy5 : 0.9032258064516129 #train_accuracy7 : 0.9428571428571428 #test_accuracy7 : 0.9032258064516129 #train_accuracy9 : 0.8571428571428571 #test_accuracy9 : 0.8709677419354839
def __learn_and_validate(self): """ Here comes the magic. Use ML algorithms to learn, validate, and test. """ print('Using extractor:', self.extractor, '\n') ################################################################### print("Learning using SVC...") classifier = SVC(C=1.0, kernel='sigmoid', gamma='auto', probability=False, verbose=True, max_iter=60000, decision_function_shape='ovr') classifier.fit(self.learn[0], self.learn[1]) print("Validating using SVC...") self.validateResults[self.__which]['SVC'] = classifier.score( self.validate[0], self.validate[1]) print("Testing using SVC...") predicted = classifier.predict(self.test[0]) expected = self.test[1] matrix = confusion_matrix(expected, predicted) self.writeConfusionMatrixCsv('SVC', matrix) print('\n', matrix, '\n') if self.__crossValidate: self.testResults[self.__which]['SVC'].append( classifier.score(self.test[0], self.test[1])) else: self.testResults[self.__which]['SVC'] = classifier.score( self.test[0], self.test[1]) #################################################################### print("Learning using MLP...") classifier = MLP(solver='lbfgs', alpha=1e-5, random_state=1) classifier.fit(self.learn[0], self.learn[1]) print("Validating using MLP...") self.validateResults[self.__which]['MLP'] = classifier.score( self.validate[0], self.validate[1]) print("Testing using MLP...") predicted = classifier.predict(self.test[0]) expected = self.test[1] matrix = confusion_matrix(expected, predicted) self.writeConfusionMatrixCsv('MLP', matrix) print('\n', matrix, '\n') if self.__crossValidate: self.testResults[self.__which]['MLP'].append( classifier.score(self.test[0], self.test[1])) else: self.testResults[self.__which]['MLP'] = classifier.score( self.test[0], self.test[1]) ########################################################################3 print("Learning using DTC...") classifier = DTC(random_state=0) classifier.fit(self.learn[0], self.learn[1]) print("Validating using DTC...") self.validateResults[self.__which]['DTC'] = classifier.score( self.validate[0], self.validate[1]) print("Testing using DTC...") predicted = classifier.predict(self.test[0]) expected = self.test[1] matrix = confusion_matrix(expected, predicted) self.writeConfusionMatrixCsv('DTC', matrix) print('\n', matrix, '\n') if self.__crossValidate: self.testResults[self.__which]['DTC'].append( classifier.score(self.test[0], self.test[1])) else: self.testResults[self.__which]['DTC'] = classifier.score( self.test[0], self.test[1]) ########################################################################## print("Learning using KNC...") classifier = KNC(n_neighbors=self.__range - 1) classifier.fit(self.learn[0], self.learn[1]) print("Validating using KNC...") self.validateResults[self.__which]['KNC'] = classifier.score( self.validate[0], self.validate[1]) print("Testing using KNC...") predicted = classifier.predict(self.test[0]) expected = self.test[1] matrix = confusion_matrix(expected, predicted) self.writeConfusionMatrixCsv('KNC', matrix) print('\n', matrix, '\n') if self.__crossValidate: self.testResults[self.__which]['KNC'].append( classifier.score(self.test[0], self.test[1])) else: self.testResults[self.__which]['KNC'] = classifier.score( self.test[0], self.test[1])
def clust(nparray, labels): neighs = KNC(n_neighbors=2) neighs.fit(nparray, labels) _, pairs = neighs.kneighbors(nparray) # get neighbor acc = sum([labels[a] == labels[b] for a, b in pairs]) / len(labels) return acc
import pandas from sklearn import cross_validation as cv from sklearn.neighbors import KNeighborsClassifier as KNC from sklearn.preprocessing import scale data = pandas.read_csv(filepath_or_buffer='../wine.data', header=None) attributes = data.ix[:, 1:] classes = data[0] kfold = cv.KFold(n=178, n_folds=5, random_state=42, shuffle=True) attributes = scale(attributes) max = 0 k = 1 k_neighbor = None while k <= 50: knc = KNC(n_neighbors=k) m = cv.cross_val_score(knc, attributes, classes, cv=kfold).mean() print '{} neighbor: {}'.format(k, m) if m > max: max = m k_neighbor = k k += 1 print 'Maximum: {}\nNeighbors: {}'.format(max, k_neighbor)
# In[20]: import seaborn as sns sns.boxplot(glass['Type']) # In[9]: from sklearn.model_selection import train_test_split train, test = train_test_split(glass, test_size=0.2) from sklearn.neighbors import KNeighborsClassifier as KNC # In[10]: neigh = KNC(n_neighbors=7) neigh.fit(train.iloc[:, 1:9], train.iloc[:, 9]) train_acc_1 = np.mean(neigh.predict(train.iloc[:, 1:9]) == train.iloc[:, 9]) train_acc_1 # In[11]: test_acc_1 = np.mean(neigh.predict(test.iloc[:, 1:9]) == test.iloc[:, 9]) test_acc_1 # In[12]: neigh = KNC(n_neighbors=7) neigh.fit(train.iloc[:, 1:9], train.iloc[:, 9]) train_acc = np.mean(neigh.predict(train.iloc[:, 1:9]) == train.iloc[:, 9]) train_acc
def get_sample(dimensions): return [get_random_point(dimensions) for i in range(1000)] dimensions = 2 sample = get_sample(dimensions) xs = [i[0] for i in sample] ys = [i[1] for i in sample] classes = [get_classification(i) for i in sample] colors = ["r" if i == 0 else "b" for i in classes] plt.scatter(xs, ys, color=colors) plt.show() clf1 = KNC(10, weights="distance") clf2 = KNC(10, weights="uniform") clfs = [clf1, clf2] for clf in clfs: clf.fit(sample, classes) new_sample = get_sample(dimensions) new_classes = [get_classification(i) for i in new_sample] for clf in clfs: print(clf.score(new_sample, new_classes)) a1 = [] a2 = [] for k in range(1, 50): clf1 = KNC(k, weights="distance")
the raw data with which the model will be trained training-classes: list the class to which the data corresponds """ known_classes = [] new_data = [] for i, training_class in enumerate(training_data): for training_data in training_class: known_classes.append(training_classes[i]) transformed_data = transform_data(training_data) new_data.append(transformed_data) knc_model.fit(new_data, known_classes) if __name__ == "__main__": FILENAME = os.path.dirname(__file__) os.chdir(FILENAME) model = KNC(n_neighbors=1) data = load_all_training_data("training sounds") classes, data = split_data(data) train_model(model, data, classes) _, sound = load_training_file("training sounds/click.wav") transformed_sound = transform_data(sound) prediction = predict(model, transformed_sound) print(f"This sound is a \"{prediction}\"") # NOTE: Save the trained model to a file for easy reuse without training # (this is for when a model uses more input data) # save_model(model, "models/trained.knn-model")
#"GNB", "QDA"] model_types = [LR, RFC, #ABC, MLPC, KNC, SVC, #DTC, #GNB, QDA] models = [LR(), RFC(n_estimators=30), #ABC(), MLPC(), KNC(), SVC(probability=True), #DTC(), #GNB(), QDA()] ### experiment bright students math finance N = 15000 minority_percent = 0.3 MIN = int(minority_percent * N) MAJ = int((1 - minority_percent) * N) # print(MIN, MAJ) # p_S_brightmath = 0.9 # p_T_brightmath = 0.1
def __init__(self): self.clf = KNC()
import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsClassifier as KNC from sklearn.cross_validation import KFold from sklearn.cross_validation import cross_val_score from sklearn.preprocessing import scale ## data = np.loadtxt('wine.data.txt', delimiter=",") X = data[:,1:14] y = data[:,0] kf = KFold(len(X), n_folds=5, shuffle=True, random_state=42); # First, without scaling kMeans = list() for k in range (1,50): knn = KNC( n_neighbors = k ) # Create classifier knn.fit(X,y) # Train classifier arr = cross_val_score(estimator = knn, X=X,y=y,scoring='accuracy', cv=kf) m = arr.mean(); kMeans.append(m) plt.plot(kMeans) plt.xlabel('N of neighbors'); plt.ylabel('Quality'); plt.title('Cross validation quality'); maxValue = max(kMeans); maxIndex = kMeans.index(maxValue); optK = maxIndex + 1; print "Maximum value is %2.2f at %d" % (maxValue,optK) # Write answers
batch_size=32, nb_epoch=50, verbose=0) #score = model.evaluate(X_test, Y_test, verbose=0) res_pred = model.predict_classes(X_test) else: if method_clsf == 'SVC': #print 'Method: SVC' clf = svm.SVC(kernel='rbf', C=10., gamma=0.1) # ["linear", "poly", "rbf", "sigmoid", "precomputed"] #print dfs[0], len(dfs), len(X_test) #for i in range(len(X_test)): # print np.argmax(dfs[i]), res_pred[i] elif method_clsf == 'Logit': clf = LR(C=10.) elif method_clsf == 'kNN': clf = KNC() elif method_clsf == 'boosting': clf = XGBC() elif method_clsf == 'GNB': clf = GNB() else: clf = None clf.fit(X_train, y_train) res_pred = clf.predict(X_test) #dfs = clf.decision_function(X_test) res_by_seg = mf.get_corr_ratio(res_pred=res_pred, y_test=y_test, type='by_seg') res_by_categ = mf.get_corr_ratio(res_pred=res_pred, y_test=y_test, type='by_categ') one_res = (float(format(res_by_seg, '.3f')), float(format(res_by_categ, '.3f')))
import pandas as pd import numpy as np glass = pd.read_csv("glass.csv") # Training and Test data using from sklearn.model_selection import train_test_split train_glass, test_glass = train_test_split( glass, test_size=0.2) # 0.2 => 20 percent of entire data # KNN using sklearn # Importing Knn algorithm from sklearn.neighbors from sklearn.neighbors import KNeighborsClassifier as KNC # for 3 nearest neighbours neigh_glass = KNC(n_neighbors=3) neigh_glass.fit(glass.iloc[:, 0:9], glass.iloc[:, 9]) train_glass_acc = np.mean( neigh_glass.predict(train_glass.iloc[:, 0:9]) == train_glass.iloc[:, 9]) test_glass_acc = np.mean( neigh_glass.predict(test_glass.iloc[:, 0:9]) == test_glass.iloc[:, 9]) glass_pred = [] for i in range(3, 50, 2): neigh_glass = KNC(n_neighbors=i) neigh_glass.fit(glass.iloc[:, 0:9], glass.iloc[:, 9]) train_glass_acc = np.mean( neigh_glass.predict(train_glass.iloc[:, 0:9]) == train_glass.iloc[:,
from sklearn.externals.six import StringIO iris = DS.load_iris() iris_data = iris.data # iris_data = preprocessing.scale(iris_data) iris_label = iris.target train_data, test_data, train_label, test_label = TTS(iris_data, iris_label, test_size=0.3, random_state=3) #KNN Accuracy KNN_Accuracy = [] for k in range(1, 101): knn = KNC(n_neighbors=k) knn.fit(train_data, train_label) result = knn.predict(test_data) KNN_Accuracy.append(knn.score(test_data, test_label)) plt.plot(KNN_Accuracy) plt.title('Accuracy influenced by different neighbers in KNN') plt.xlabel('neighbers(1-100)') plt.ylabel('accuracy(%)') plt.show() NB = GaussianNB() NB.fit(train_data, train_label) fig = plt.figure(figsize=(12, 15)) fig.tight_layout()
import data_extractor as data_ext import cross_validation as cross_val import numpy as np data = data_ext.read_data("../data/book_data.xlsx") author_data = data["author"] genre_data = data["genre"] features = np.column_stack((author_data, genre_data)) rating_data = data["user_rating"] labels = np.array(rating_data) # Decision Tree Classifier from sklearn.tree import DecisionTreeClassifier as DTC print "Decision Tree Classifier accuracy: \t", cross_val.leave_k_out_cross_validation( 1, DTC(), features, labels) # KNeighbors Classifier from sklearn.neighbors import KNeighborsClassifier as KNC print "KNeighbors Classifier accuracy: \t", cross_val.leave_k_out_cross_validation( 1, KNC(), features, labels) # SVM from sklearn.svm import SVC print "SVM accuracy: \t", cross_val.leave_k_out_cross_validation( 1, SVC(gamma='auto'), features, labels)
plt.scatter(bumpy_fast, grade_fast, color="b", label="fast") plt.scatter(grade_slow, bumpy_slow, color="r", label="slow") plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") # plt.show() ################################################################################ # your code here! name your classifier object clf if you want the # visualization code (prettyPicture) to show you the decision boundary from sklearn.ensemble import RandomForestClassifier as RFC from sklearn.ensemble import AdaBoostClassifier as ABC from sklearn.neighbors import KNeighborsClassifier as KNC from sklearn.tree import DecisionTreeClassifier as DTC clf_list = [RFC(), ABC(), KNC()] acc_list = [] # RandomForestClassifier # n_estimators ~10 # criterion = 'entropy'/ 'gini' # max_features ~0.4 # Max acc: 0.94 # KNeighborsClassifier # n_neighbors: 8 # weights: uniform # algorithm: any # Max acc: 0.944 # AdaBoostClassifier # base_estimator:
def asd(X, X2, Y, Y2): neighs = KNC(n_neighbors=1) neighs.fit(X, Y) _, pairs = neighs.kneighbors(X2) return sum([Y[a] == b for a, b in zip(pairs, Y2)]) / len(Y2)
"vae_batch_size": 1000 } vptsne.fit(mnist.train._images, **fit_params) ptsne.fit(mnist.train._images, **fit_params) vptsne.save_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt") ptsne.save_weights("models/mnist_ptsne.ckpt") #vptsne.load_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt") #ptsne.load_weights("models/mnist_ptsne.ckpt") pca = PCA(n_components=2).fit(mnist.train._images) estimators = [vptsne, ptsne, vae, pca] transformed_train = [ estimator.transform(mnist.train._images) for estimator in estimators ] transformed_test = [ estimator.transform(mnist.test._images) for estimator in estimators ] print("Trustworthiness for test set (vptsne, ptsne, vae, pca):", [ trustworthiness(mnist.test._images, transformed, n_neighbors=12) for transformed in transformed_test ]) print("1-NN score for test set (vptsne, ptsne, vae, pca)", [ KNC(n_neighbors=1).fit(train, mnist.train._labels).score( test, mnist.test._labels) for train, test in zip(transformed_train, transformed_test) ])
print(i) c = S1[i] #INSTANCES L2 = [] S2 = [] for n in rl(num[0]): if array[n][c] == 0: S2.append(n) else: L2.append(n) XTrain, XImpute = split(X, L2, axis=0) ##TrainX,TestX YTrain, YImpute = split(Y0, L2, axis=0) ##TrainY,Prediction Knn = KNC(n_neighbors=k0) Knn.fit(XTrain, YTrain) y = Knn.predict(XImpute) print(y) for j in rl(y): YImpute[j] = y[j] Yi.append(weave(YTrain, YImpute, L2, S2)) X = weave(XTrain, XImpute, L2, S2) Y_ = [] for i in range(3): Y_.append(list(np.dot(Yi[i], 1 / max(Yi[i])))) Y_ = np.transpose(Y_) Y = np.transpose(Yi) Data = pd.DataFrame(np.transpose(
'min_samples_split': [2, 3, 4]}, use_kbest=True, use_scaler=False) # Nearest Neighbors clf, features_list = build_model(all_features, KNC(), {'n_neighbors': [2, 3, 4, 5], 'weights': ['uniform', 'distance'], 'leaf_size': [2, 3, 4, 5, 6], 'p': [1, 2, 3]}, use_kbest=True, use_scaler=True) ''' # The best estimator found estimator = KNC(n_neighbors=3, weights='uniform', leaf_size=2, p=3) clf, features_list = build_model(all_features, estimator, {}, use_kbest=True, k=[14], use_scaler=True) # ---------------------------------------------------------- # Assess New Features # ---------------------------------------------------------- original_features = [ f for f in features_list if f not in ['grand_total', 'to_poi_ratio', 'from_poi_ratio'] ]
import pandas as pd import seaborn as sns import numpy as np glass = pd.read_csv( "C:/Users/USER/Desktop/KNN-TECHNIQUE/knn-assignment/glass.csv") from sklearn.model_selection import train_test_split train, test = train_test_split(glass, test_size=0.2) from sklearn.neighbors import KNeighborsClassifier as KNC neigh = KNC(n_neighbors=2) ###model neigh.fit(train.iloc[:, 0:9], train.iloc[:, 9]) ##train accuracy train_acc = np.mean(neigh.predict(train.iloc[:, 0:9]) == train.iloc[:, 9]) # test accuracy test_acc = np.mean(neigh.predict(test.iloc[:, 0:9]) == test.iloc[:, 9]) ##finding best k value and checking accuracy acc = [] for i in range(3, 50, 2): neigh = KNC(n_neighbors=i) neigh.fit(train.iloc[:, 0:9], train.iloc[:, 9]) train_acc = np.mean(neigh.predict(train.iloc[:, 0:9]) == train.iloc[:, 9]) test_acc = np.mean(neigh.predict(test.iloc[:, 0:9]) == test.iloc[:, 9]) acc.append([train_acc, test_acc]) ##plots import matplotlib.pyplot as plt # library to do visualizations
glass.head(30) # to get top n rows use glass.head(10) glass.tail(10) ############################################################## # Training and Test data using from sklearn.model_selection import train_test_split # KNN using sklearn # Importing Knn algorithm from sklearn.neighbors def norm_func(i): x = (i-i.mean())/(i.std()) return (x) # Normalized data frame (considering the numerical part of data) df_norm = norm_func(glass.iloc[:,0:]) train,test = train_test_split(glass,test_size = 0.2) # 0.2 => 20 percent of entire data from sklearn.neighbors import KNeighborsClassifier as KNC # for 4 nearest neighbours neigh = KNC(n_neighbors= 4) # Fitting with training data neigh.fit(train.iloc[:,0:10],train.iloc[:,9]) # train accuracy train_acc = np.mean(neigh.predict(train.iloc[:,0:10])==train.iloc[:,9]) ### 95.91% # test accuracy test_acc = np.mean(neigh.predict(test.iloc[:,0:10])==test.iloc[:,9]) ### 93.02% # for 5 nearest neighbours neigh = KNC(n_neighbors=5) # fitting with training data neigh.fit(train.iloc[:,0:10],train.iloc[:,9]) # train accuracy train_acc = np.mean(neigh.predict(train.iloc[:,0:10])==train.iloc[:,9]) ### 95.32% # test accuracy test_acc = np.mean(neigh.predict(test.iloc[:,0:10])==test.iloc[:,9]) #### 90.69% # creating empty list variable
import pandas as pd import numpy as np glass = pd.read_csv("C:/Users/USER/Downloads/glass.csv") # Training and Test data using from sklearn.model_selection import train_test_split train, test = train_test_split( glass, test_size=0.2) # 0.2 => 20 percent of entire data # KNN using sklearn # Importing Knn algorithm from sklearn.neighbors from sklearn.neighbors import KNeighborsClassifier as KNC # for 3 nearest neighbours neigh = KNC(n_neighbors=3) # Fitting with training data neigh.fit(train.iloc[:, 0:9], train.iloc[:, 9]) # train accuracy train_acc = np.mean(neigh.predict( train.iloc[:, 0:9]) == train.iloc[:, 9]) # accuracy 82.45% # test accuracy test_acc = np.mean(neigh.predict(test.iloc[:, 0:9]) == test.iloc[:, 9]) # 65.11% # for 5 nearest neighbours neigh = KNC(n_neighbors=5)
# coding: utf-8 # In[14]: import numpy as np from sklearn import datasets from sklearn.neighbors import KNeighborsClassifier as KNC iris = datasets.load_iris() x= iris.data y= iris.target np.unique(y) np.random.seed(123) indices = np.random.permutation(len(x)) iris_x_train = x[indices[:-10]] iris_y_train = y[indices[:-10]] iris_x_test = x[indices[-10:]] iris_y_test = y[indices[-10:]] model=KNC() model.fit(iris_x_train, iris_y_train) KNC(algorithm='auto',leaf_size=30, metric='minkowski', metric_params=None,n_jobs=1,n_neighbors=5, p=2,weights='uniform') out=model.predict(iris_x_test) print("predicted:",out) print("True :",iris_y_test)
clf.best_estimator_ clf.best_score_ # Predicting test set results Y_pred = clf.predict(X_test) confusion_matrix(Y_test,Y_pred) accuracy_score(Y_test, Y_pred) # Another method without Hyperparameter tunning # running KNN algorithm for 3 to 50 nearest neighbours(odd numbers) and # storing the accuracy values accuracy = [] from sklearn.neighbors import KNeighborsClassifier as KNC for i in range(3,50,2): neigh = KNC(n_neighbors=i) neigh.fit(X_train,Y_train) train_acc = np.mean(neigh.predict(X_train)==Y_train) test_acc = np.mean(neigh.predict(X_test)==Y_test) accuracy.append([train_acc,test_acc]) import matplotlib.pyplot as plt # library to do visualizations # train accuracy plot plt.plot(np.arange(3,50,2),[i[0] for i in accuracy],"bo-") # test accuracy plot plt.plot(np.arange(3,50,2),[i[1] for i in accuracy],"ro-") plt.legend(["train","test"]) # In both the methods n_neighbors = 3 is the best and observed from second method k=23 looks good but accuracy decreases. # Build the KNN classifier with k=3
# Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) # Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) # Fitting classifier to the Training set from sklearn.neighbors import KNeighborsClassifier as KNC classifier = KNC(n_neighbors=5) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) # Visualising the Training set results from matplotlib.colors import ListedColormap X_set, y_set = X_train, y_train X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1,
# generate train test datasets print('\n generating train test datasets...') X_train, X_validation, Y_train, Y_validation = m_s.train_test_split( X, Y, test_size=validation_size, random_state=seed) # define 'scoring' parameter as 'accuracy' scoring = 'accuracy' # define array to hold candidate models models = [] # instantiate candidate models and add to array print('\n instantiating candidate models...') models.append(('LR', LR())) models.append(('LDA', LDA())) models.append(('KNC', KNC())) models.append(('DTC', DTC())) models.append(('GNB', GNB())) # run test harness results = [] names = [] print('\n running test harness...') for name, model in models: # 'kfold' var sets up the k-fold cross validation kfold = m_s.KFold(n_splits=10, random_state=seed) # 'cv_results' applies cross validation process to each model using the # training data i.e. features matrix X_train and results vector Y_train cv_results = m_s.cross_val_score(model, X_train, Y_train,
if isinstance(estimator, VAE): # Already fitted transformed = estimator.transform(levine_data) else: transformed = estimator.fit_transform(levine_data) print(estimator.__class__.__name__, "fit_transform completed in", curr_millis() - start, "(ms)") return transformed transformed_all = [fit_transform_fn(estimator) for estimator in estimators] print("Trustworthiness (vptsne, ptsne, vae, pca, umap, tsne)", [ trustworthiness(levine_data[subset_b_indices], transformed[subset_b_indices], n_neighbors=12) for transformed in transformed_all ]) print("1-NN score for test set (vptsne, ptsne, vae, pca, umap, tsne)", [ KNC(n_neighbors=1).fit( transformed[subset_a_indices], levine_labels[subset_a_indices]).score( transformed[subset_b_indices], levine_labels[subset_b_indices]) for transformed in transformed_all ]) for i, transformed in enumerate(transformed_all): plt.clf() for label in np.unique(levine_labels): tmp = transformed[levine_labels == label] plt.scatter(tmp[:, 0], tmp[:, 1], s=0.2, c=color_palette[label]) plt.show()