def getresult(*a): from sklearn.neighbors import KNeighborsClassifier as kn import pandas as pd df = pd.read_csv(r'C:\Users\Admin\Desktop\Laxman\log\iris.csv') #print(df) x = df.iloc[:, :4] #print(x) y = df.iloc[:, 4] y = y.replace('setosa', 0) y = y.replace('versicolor', 1) y = y.replace('virginica', 2) #print(y) mymodule = kn() mymodule.fit(x, y) i = [1, 5, 4, 2] r = mymodule.predict([a]) print('r=', r) return r
def knn(DataSmoteMSample,k) : nbrs = kn(n_neighbors = k,algorithm='ball_tree').fit(DataSmoteMSample) distance, indices = nbrs.kneighbors(DataSmoteMSample) return indices
'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol' ]] y = data['quality'] clf1 = ExtraTreesClassifier(n_estimators=82, max_depth=None, min_samples_split=1, random_state=0) clf2 = RandomForestClassifier(random_state=0, n_estimators=250, min_samples_split=1) clf3 = GradientBoostingClassifier(n_estimators=82, learning_rate=0.1, max_depth=1, random_state=0) clf4 = GaussianNB() clf5 = kn(n_neighbors=13) test = pd.read_csv('red_test.csv') x_test = test[[ 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol' ]] y_test = test['quality'] clf = VotingClassifier(estimators=[('et', clf1), ('rf', clf2), ('gb', clf3), ('gnb', clf4), ('kn', clf5)], voting='soft', weights=[14, 3, 1, 1, 3]).fit(x, y) print(clf.score(x_test, y_test))
from math import * import pandas as pd import numpy as np from sklearn.neighbors import KNeighborsClassifier as kn import matplotlib.pyplot as plt data = pd.read_csv('ice.csv') x = data[['temp', 'street']] y = data['ice'] clf = kn(n_neighbors=1) clf.fit(x, y) print(clf.score(x, y)) t = np.arange(0.0, 31.0) plt.plot(t, y, '--', t, clf.predict(x), '-') plt.show()
train_data = train_data.drop(['timedelta'], axis=1) #remove 'url' information. # train_data= train_data[train_data["shares"]<40000] X = np.array(train_data.drop(['shares'], axis=1)) y = np.array(train_data['shares']) #This is the target X = preprocessing.scale(X) XTrain = X[:N,:] #use the first N samples for training yTrain = y[:N] XVal = X[N:,:] #use the rests for validation yVal = y[N:] Xtest = test_data.values Xtest = preprocessing.scale(Xtest) # print type(XTrain) matrix for i in [1,3,5,10,20]: model = kn(n_neighbors =i ,n_jobs = -1) model.fit(XTrain,yTrain) training = model.predict(XTrain) validation = model.predict(XVal) print "KN " + str(i) print "Training error ", np.mean(np.abs(yTrain - training)) print "Validation error ", np.mean(np.abs(yVal - validation)) result = model.predict(Xtest) np.savetxt('result/resultKN'+ str(i) + '.txt', result)
print("Eigenvalues of x = \n", eigenvalues) print("Eigenvectors of x = \n", eigenvectors) file = [] for i in range(0, len(eigenvalues)): if eigenvalues[i] >= 1: file.append(i) evalsfilt = eigenvalues[file] print("Number of Dimesions used is ", (len(evalsfilt))) transf = eigenvectors * la.inv(np.sqrt(np.diagflat(eigenvalues))) print("Transformation Matrix = ", transf) transf_matrix = matrix * transf print("The Transformed x = ", transf_matrix) xtx1 = transf_matrix.transpose() * transf_matrix print("Expect an Identity Matrix = ", xtx1) #Q3 - d neigh = kn(n_neighbors=5, algorithm='brute', metric='euclidean') nbrs = neigh.fit(transf_matrix) kNNSpec = kNC(n_neighbors=5) nbrsC = kNNSpec.fit(transf_matrix, np.array(no_of_frauds)) scor = nbrsC.score(transf_matrix, np.array(no_of_frauds)) print("The result of score function is " + str(round(scor, 4))) #Q3 - e focal = [7500, 15, 3, 127, 2, 2] print("The focal observation is ", str(focal)) transfFocal = focal * transf print("The Transformed focal observation is " + str(transfFocal)) myNeighbors_t = nbrs.kneighbors(transfFocal, return_distance=False) print("The indices of the five neighbors of the focal are " + str(myNeighbors_t)) myNeighbors_t_values = matrix[myNeighbors_t] print("The input and target values of the nearest neighbors are \n")
print(file.shape) print(file) mnist=dict() print(file.columns) mnist['data']=file mnist['target']=file['label'] del mnist['data']['label'] mnist['data']=mnist['data'].values mnist['target']=mnist['target'].values X,y=mnist['data'],mnist['target'] X_train,y_train,X_test,y_test=X[:50000],y[:50000],X[50000:],y[50000:] import numpy as np shuffle_index=np.random.permutation(50000) X_train,y_train=X_train[shuffle_index],y_train[shuffle_index] from sklearn.neighbors import KNeighborsClassifier as kn knn_clf=kn() X_train=X_train[:1000] y_train=y_train[:1000] knn_clf.fit(X_train,y_train) y_pred=knn_clf.predict(X_train) from sklearn.model_selection import GridSearchCV as grid param=[{'weights':['distance'],'n_neighbors':[2,3,4],'p':[4,5,6]}] kn_clf=kn() grid_clf=grid(kn_clf,param,cv=2,scoring='neg_mean_squared_error') print('start!') import time t1=time.time() grid_clf.fit(X_train,y_train) t2=time.time() from matplotlib import pyplot as plt print(grid_clf.best_estimator_)
y, test_size=1 / 5, random_state=0) # print(y_train) print('样本总数:{},训练样本数:{},测试样本数:{}'.format(len(fruits_df), len(y_train), len(y_test))) # 2、可视化查看特征变量 # sns.pairplot(data= fruits_df, hue='fruit_name', vars=['mass', 'width', 'height', 'color_score']) # plt.tight_layout() # plt.show() # 3、建立模型 from sklearn.neighbors import KNeighborsClassifier as kn knn = kn(n_neighbors=5) # 4、训练模型 knn.fit(X_train, y_train) # 5、测试模型 y_pred = knn.predict(X_test) #c测试集中预测y的值 from sklearn.metrics import accuracy_score acc = accuracy_score(y_test, y_pred) # 真实值与预测值对比后,得到准确率 print('准确率:{}'.format(acc)) # 6、查看k值对结果的影响 k_range = range(1, 20) acc_score = []
from sklearn.neighbors import KNeighborsClassifier as kn from sklearn.cross_validation import train_test_split from sklearn.metrics import accuracy_score as ac #plt.style.use('ggplot') iris = datasets.load_iris() type(iris) print(iris.keys()) iris.data.shape iris.target_names # EDA X = iris.data y = iris.target y = y.astype(float) df = pd.DataFrame(X, columns=iris.feature_names) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 3, random_state=6) knn = kn(n_neighbors=6) knn.fit(X_train, y_train) pred = knn.predict(X_test) #visualization #plt.scatter(X_train,y_train,color='red') #plt.plot(X_train,knn.predict(X_train),color= 'blue') print("Accuracy using Knn AT N=6 is: ") ac(y_test, pred) #print(df.head()) #pd.scatter_matrix(df,c=y,figsize= [9,9],s= 150,marker= 'D')
plt.show() plt.scatter(ts_x, ts_y) plt.plot(ts_x[np.argsort(ts_x)], prob_ts[:, 1][np.argsort(ts_x)], color='red') plt.xlabel("X") plt.ylabel("Y") plt.title("HW2test Scatter Plot and Prob(y = 1|x)") plt.show() # ## (C) # ### (1) # In[24]: for i in [1, 3, 9]: model_kn = kn(n_neighbors=i, weights='uniform', algorithm='auto').fit(x_train, y_train.ravel()) gen_x = np.linspace(0, 100, num=1000) pred_knn_gen = model_kn.predict(gen_x.reshape(1000, -1)) pred_knn_gen_pr = model_kn.predict_proba(gen_x.reshape(1000, -1)) ts_x = np.array(df_test['X']) ts_y = np.array(df_test['Y']) prob_ts_pr = model_kn.predict_proba(ts_x.reshape(ts_size[0], -1)) prob_ts = model_kn.predict(ts_x.reshape(ts_size[0], -1)) print("KNN Accuracy for HW2train using score() function- %.2f%%" % (model_kn.score(x_train, y_train) * 100)) print("KNN(%d) Accuracy for HW2test %.2f%%" % (i, model_kn.score(ts_x.reshape(ts_size[0], -1), ts_y) * 100))
from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier as kn data = pd.read_csv('red.csv') x = data[[ 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol' ]] y = data['quality'] clf1 = GaussianNB() clf2 = ExtraTreesClassifier(n_estimators=82, max_depth=None, min_samples_split=1, random_state=0) clf3 = RandomForestClassifier(random_state=0, n_estimators=250, min_samples_split=1) clf4 = kn(n_neighbors=13) clf = VotingClassifier(estimators=[('gnb', clf1), ('et', clf2), ('rf', clf3), ('kn', clf4)], voting='soft', weights=[1, 8, 2, 1]).fit(x, y) test = pd.read_csv('red_test.csv') x = test[[ 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol' ]] y = test['quality'] print(clf.score(x, y))
from sklearn.svm import SVC train = train.sample(frac=1) train = train.head(5000) x = train.drop('label', axis=1) y = train['label'] from sklearn.cross_validation import train_test_split from sklearn.metrics import accuracy_score x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=101) st_scaler = StandardScaler() x_train = st_scaler.fit_transform(x_train) x_test = st_scaler.transform(x_test) model = kn() model.fit(x_train, y_train) predictions = model.predict(x_test) print(accuracy_score(y_test, predictions)) x = model.predict(test) l = {'ImageId': imageId, 'label': x} df = pd.DataFrame(l) df.to_csv('sub_for_dig.csv', index=False)
data.head() # In[5]: image = data.iloc[:, 1:] label = data.iloc[:, :1] # # using knn to classify the images # In[8]: image = data.iloc[:, 1:] label = data.iloc[:, :1] from sklearn.neighbors import KNeighborsClassifier as kn knn = kn(n_neighbors=10) x_train, x_test, y_train, y_test = train_test_split(image, label, test_size=0.2, random_state=100) knn.fit(x_train, y_train) # In[ ]: predic = knn.predict(x_test) # In[ ]: from sklearn import metrics # In[ ]: