def performLDA(data_to_fit, y, numComponent=None): data_to_fit_np_t = np.array(data_to_fit).T if numComponent is None: numComponent = len(data_to_fit_np_t) lda_model = LinearDiscriminantAnalysis(n_components=numComponent) lda_results = lda_model.fit_transform(data_to_fit_np_t, y) return lda_model, lda_results
def assess_embedding(to_vec): """ Returns LDA classification score and projected data """ (x_data, y_data) = get_x_y_matrices(to_vec) lda = LDA(n_components=2) x_prime = lda.fit_transform(x_data, y_data) score = lda.score(x_data, y_data) return (x_prime.reshape(26, ), y_data, score)
def transformLDA(X,y,xTest): originalSize = np.size(X,1) print("Learning LDA \nProjecting {} features to 1 component".format(originalSize)) priors = [0.5,0.5] clf = LinearDiscriminantAnalysis('svd', n_components=1,priors=priors) print(X.shape) X = clf.fit_transform(X,y) print("True size of X : ", X.shape) if xTest != []: xTest = clf.transform(xTest) return X,xTest
def run_LDA(df): """ Run LinearDiscriminantAnalysis on input dataframe (df) and return transformed data, scalings and """ # Prep variables for sklearn LDA X = df[range(1, df.shape[1])].values # input data matrix y = df["Condition"].values # data categories list # Calculate LDA sklearn_lda = LDA() X_lda_sklearn = sklearn_lda.fit_transform(X, y) exp_var = sklearn_lda.explained_variance_ratio_ return X_lda_sklearn, y, exp_var
def run_LDA(df): # Prep variables for sklearn LDA X = df[range(2, df.shape[1])].values # input data matrix y = df['Condition'].values # data categories list # Calculate LDA sklearn_lda = LDA(n_components=2) X_lda_sklearn = sklearn_lda.fit_transform(X, y) # Quality Test - can be ignored # print len(X_lda_sklearn) # print sklearn_lda.predict_proba(X) # print(sklearn_lda.score(X, y)) return X_lda_sklearn, y
def train_model(csv_path): ''' INPUT: audio features csv with 'class' labels included OUTPUT: three pickled models stored in the models dir - StandardScaler (sklearn) - LinearDiscriminantAnalysis (sklearn) - SVC (sklearn) Takes an audio feature csv (created from 'feature_extraction.py') and returns pickled models to use ''' csv = LOCAL_REPO_DIR + csv_path df = pd.read_csv(csv_path) # extracts X, y for training model from dataframe X = df.drop(['class', 'fold', 'Unnamed: 0'], axis=1).values y = df['class'].values # feature matrix has many different scales, need to standardize ss = StandardScaler() X = ss.fit_transform(X) lda = LinearDiscriminantAnalysis() X_lda = lda.fit_transform(X, y) # trains model using best performing model/hyperparameters using kfold grid search svm = SVC(C=1, gamma=0.04) svm.fit(X_lda, y) # accuracy check to make sure the model is performing y_pred_svm = svm.predict(X_lda) print 'model accuracy: ', accuracy_score(y, y_pred_svm) # cPickles models for later use with open(LOCAL_REPO_DIR + 'model/svm.pkl', 'wb') as f: cPickle.dump(svm, f) with open(LOCAL_REPO_DIR + 'model/lda.pkl', 'wb') as f: cPickle.dump(lda, f) with open(LOCAL_REPO_DIR + 'model/ss.pkl', 'wb') as f: cPickle.dump(ss, f)
def fit_svm(prints): print "Fitting to SVM...." dataframe = pd.DataFrame(prints) y = dataframe[2] X = dataframe[0] # in case feature matrix has many different scales, need to standardize ss = StandardScaler() X = ss.fit_transform(X) lda = LinearDiscriminantAnalysis() X_lda = lda.fit_transform(X_1, y) # trains model using best performing model/hyperparameters using kfold grid search svm = SVC(C=1, gamma=0.04) svm.fit(X_lda, y) pickle_model(svm, 'svm')
def run_LDA(df): """ Run LinearDiscriminantAnalysis on input dataframe (df) and return transformed data, scalings and explained variance by discriminants. """ # Prep variables for sklearn LDA X = df.iloc[:, 1:df.shape[1]].values # input data matrix y = df["Condition"].values # data categories list # Calculate LDA sklearn_lda = LDA() X_lda_sklearn = sklearn_lda.fit_transform(X, y) try: exp_var = sklearn_lda.explained_variance_ratio_ except AttributeError as ae: print("\n{}: explained variance cannot be computed.\nPlease check this GitHub PR:" " https://github.com/scikit-learn/scikit-learn/pull/6027".format(ae)) return X_lda_sklearn, y, "NA" return X_lda_sklearn, y, exp_var
def project_back(x,digits): myLDA = LDA() new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels) print(new_train.shape) m = 0 n = 1 plt.figure() plt.scatter(new_train[digits.train_Labels == 0,m],new_train[digits.train_Labels == 0,n], color='Green', s= 1) plt.scatter(new_train[digits.train_Labels == 1,m],new_train[digits.train_Labels == 1,n], color='Blue', s= 1) plt.scatter(new_train[digits.train_Labels == 2,m],new_train[digits.train_Labels == 2,n], color='Red', s= 1) plt.scatter(new_train[digits.train_Labels == 3,m],new_train[digits.train_Labels == 3,n], color='Purple', s= 1) plt.scatter(new_train[digits.train_Labels == 4,m],new_train[digits.train_Labels == 4,n], color='Black', s= 1) plt.scatter(new_train[digits.train_Labels == 5,m],new_train[digits.train_Labels == 5,n], color='Brown', s= 1) plt.scatter(new_train[digits.train_Labels == 6,m],new_train[digits.train_Labels == 6,n], color='Silver', s= 1) plt.scatter(new_train[digits.train_Labels == 7,m],new_train[digits.train_Labels == 7,n], color='Cyan', s= 1) plt.show() y = [email protected]_[:9,:] # I really don't know if this will work since there are 10 coef things weighted_y2 = y[:,:154]@x.V[:154,:] + x.centers plt.imshow(weighted_y2[0,:].reshape(28,28)) plt.show()
def plot_sklearn_lda_with_lr(X_train, X_test, y_train, y_test): lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.show() X_test_lda = lda.transform(X_test) plot_decision_regions(X_test_lda, y_test, classifier=lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc='lower left') plt.show()
def apply(self): transformed = components = None if self.data is not None: self.data = Continuize(Impute(self.data)) lda = LinearDiscriminantAnalysis(solver='eigen', n_components=2) X = lda.fit_transform(self.data.X, self.data.Y) dom = Domain([ContinuousVariable('Component_1'), ContinuousVariable('Component_2')], self.data.domain.class_vars, self.data.domain.metas) transformed = Table(dom, X, self.data.Y, self.data.metas) transformed.name = self.data.name + ' (LDA)' dom = Domain(self.data.domain.attributes, metas=[StringVariable(name='component')]) metas = np.array([['Component_{}'.format(i + 1) for i in range(lda.scalings_.shape[1])]], dtype=object).T components = Table(dom, lda.scalings_.T, metas=metas) components.name = 'components' self.send("Transformed data", transformed) self.send("Components", components)
def do_LDA2D_KNN(digits,p,q): l,r = LDA2D.iterative2DLDA(digits.train_Images, digits.train_Labels, p, q, 28, 28) new_train = np.zeros((digits.train_Images.shape[0],p*q)) for i in range(digits.train_Images.shape[0]): new_train[i] = (np.transpose(l)@digits.train_Images[i].reshape(28,28)@r).reshape(p*q) new_test = np.zeros((digits.test_Images.shape[0],p*q)) for i in range(digits.test_Images.shape[0]): new_test[i] = (np.transpose(l)@digits.test_Images[i].reshape(28,28)@r).reshape(p*q) myLDA = LDA() x = center_matrix_SVD(new_train) new_new_train = myLDA.fit_transform(new_train-x.centers,digits.train_Labels) new_new_test = myLDA.transform(new_test-x.centers) labels, nearest = KNN(new_new_train,digits.train_Labels,new_new_test,10,'euclidean') pickle.dump(labels, open('LDA2DFDA'+ str(p) + 'x' + str(q) + '_EU.p','wb')) #pickle.dump(nearest, open('NLDA2DFDA'+ str(p) + 'x' + str(q) + '_EU.p','wb')) labels, nearest = KNN(new_new_train,digits.train_Labels,new_new_test,10,'cityblock') pickle.dump(labels, open('LDA2DFDA'+ str(p) + 'x' + str(q) + '_CB.p','wb')) #pickle.dump(nearest, open('NLDA2DFDA'+ str(p) + 'x' + str(q) + '_CB.p','wb')) labels, nearest = KNN(new_new_train,digits.train_Labels,new_new_test,10,'cosine') pickle.dump(labels, open('LDA2DFDA'+ str(p) + 'x' + str(q) + '_CO.p','wb'))
def leave_one_out(feature_dict, glob, classifier, title): # feature_dict is a dictionary of feature names and a triple of booleans defining # which summary metrics to include respectively: (mean, std, measurewise) all_features = glob.get_features(feature_dict) all_classes = glob.get_feature('class', (True, True, True)) class_pred, class_real = [], [] vis.print_stars(newline=True) print("Testing " + title + " classification with features:") print(list(feature_dict.keys())) vis.print_dashes() sys.stdout.write("\r0 / %d samples processed (...)" % len(all_features)) pca = LinearDiscriminantAnalysis() all_features = pca.fit_transform(all_features, all_classes.ravel()) start = time.clock() for idx in range(len(all_features)): train_features = np.delete(all_features, idx, 0) train_classes = np.delete(all_classes, idx, 0) test_feature = np.transpose(all_features[idx,:]).reshape((1, train_features.shape[1])) test_class = np.transpose(all_classes[idx,:]) predicted_class = classify(train_features, train_classes, test_feature, classifier) class_pred.append(predicted_class) class_real.append(genre_from_int(test_class)) t = time.clock() - start time_per_iteration = t / (idx + 1) remaining = time_per_iteration * (len(all_features) - (idx + 1)) sys.stdout.write("\r%d / %d samples processed (%02d:%02d:%02d left)" % ((idx + 1), len(all_features), remaining / 3600, (remaining / 60) % 60, remaining % 60)) return [class_pred, class_real]
def main(): digits = mnist() # Creates a class with our mnist images and labels if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info pickle.dump(x,open('Training SVD Data','wb')) else: x = pickle.load(open('Training SVD Data','rb')) # If we already have the file just load it if 1: # if this is zero skip test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0)) tic() myLDA = LDA() # Create a new instance of the LDA class new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels) # It will fit based on x.PCA new_test = myLDA.transform([email protected](x.V[:154,:])) # get my transformed test dataset Knn_labels = local_kmeans_class(new_train,digits.train_Labels,new_test,10) # Run kNN on the new data toc() pickle.dump(Knn_labels,open('Loc_kmeans_fda_lab','wb')) fda = pickle.load(open('Loc_kmeans_fda_lab','rb')) labels_Full = pickle.load(open('KNN_Full','rb')) loc_full = pickle.load(open('Loc_kmeans_Full_lab','rb')) errors_fda,ind_fda = class_error_rate(np.transpose(fda),digits.test_labels) errors_near,ind_near = class_error_rate(labels_Full,digits.test_labels) errors_full,ind_full = class_error_rate(np.transpose(loc_full),digits.test_labels) labels_50 = pickle.load(open('KNN_50','rb')) errors_50,ind_50 = class_error_rate(labels_50,digits.test_labels) print(errors_full) plt.figure() plt.plot(np.arange(10)+1, errors_fda, color='Green', marker='o', markersize=10, label='fda Kmeans') #plots the 82.5% plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN') plt.plot(np.arange(10)+1, errors_full, color='Yellow', marker='o', markersize=10, label='Full Kmeans') plt.plot(np.arange(10)+1, errors_50, color='Red', marker='o', markersize=10, label='kNN 50') axes = plt.gca() axes.set_ylim([0.015,0.12]) plt.grid(1) # Turns the grid on plt.title('Plot of Local Kmeans with FDA Error rates') plt.legend(loc='upper right') # Puts a legend on the plot plt.show() project_back(x,digits)
def dimension_reduce(self,mode='L'): print 'Reduce Dimensions...' print 'Start:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') raw_train=self.train.copy() train=self.train.copy() train_label=self.train_label['label'].values.copy() train_label=train_label.reshape((train_label.shape[0])) test=self.test.copy() test_label=self.test_label['label'].values.copy() test_label=test_label.reshape((test_label.shape[0])) flist=train.columns if mode.upper()=='L': lda=LinearDiscriminantAnalysis() X_new=lda.fit_transform(train.values,train_label) self.train=pd.DataFrame(X_new,columns=['DR']) self.test=pd.DataFrame(lda.transform(test[flist].values),columns=['DR']) tt=lda.coef_[0] ind=np.argsort(tt) features=raw_train.columns[ind[-100:]] feas=pd.DataFrame() feas['feature']=features feas['values']=tt[ind[-100:]] return feas elif mode.upper()=='P': pca = PCA(n_components=100) X_new=pca.fit_transform(train.values,train_label) self.train=pd.DataFrame(X_new) self.test=pd.DataFrame(pca.transform(test[flist].values)) print 'End:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
def best_lda_nba(self): dh = data_helper() X_train, X_test, y_train, y_test = dh.get_nba_data() scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) lda = LinearDiscriminantAnalysis(n_components=2) X_train_transformed = lda.fit_transform(X_train_scl, y_train) X_test_transformed = lda.transform(X_test_scl) # save filename = './' + self.save_dir + '/nba_lda_x_train.txt' pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False) filename = './' + self.save_dir + '/nba_lda_x_test.txt' pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False) filename = './' + self.save_dir + '/nba_lda_y_train.txt' pd.DataFrame(y_train).to_csv(filename, header=False, index=False) filename = './' + self.save_dir + '/nba_lda_y_test.txt' pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
def main(): digits = mnist() # Creates a class with our mnist images and labels if open('Training SVD Data','rb')._checkReadable() == 0: # Check if file exist create it if it doesn't print("im here") # Just wanted to check if it was going in here x = center_matrix_SVD(digits.train_Images) # Creates a class with our svd and associated info pickle.dump(x,open('Training SVD Data','wb')) else: x = pickle.load(open('Training SVD Data','rb')) # If we already have the file just load it if 0: # if this is zero skip test_Images_Center = np.subtract(digits.test_Images,np.repeat(x.centers,digits.test_Images.shape[0],0)) tic() myLDA = LDA() # Create a new instance of the LDA class new_train = myLDA.fit_transform(x.PCA[:,:154],digits.train_Labels) # It will fit based on x.PCA new_test = myLDA.transform([email protected](x.V[:154,:])) # get my transformed test dataset Knn_labels, nearest = KNN(new_train,digits.train_Labels,new_test,10) # Run kNN on the new data toc() pickle.dump(Knn_labels,open('FDAKNN_Lables','wb')) pickle.dump(nearest,open('FDAKNN_neastest','wb')) fda = pickle.load(open('FDAKNN_Lables','rb')) labels_Full = pickle.load(open('KNN_Full','rb')) labels_50 = pickle.load(open('KNN_50','rb')) errors_fda,ind_fda = class_error_rate(fda,digits.test_labels) errors_near,ind_near = class_error_rate(labels_Full,digits.test_labels) errors_50,ind_50 = class_error_rate(labels_50,digits.test_labels) plt.figure() plt.plot(np.arange(10)+1, errors_fda, color='Green', marker='o', markersize=10, label='fda') #plots the 82.5% plt.plot(np.arange(10)+1, errors_near, color='Blue', marker='o', markersize=10, label='kNN') plt.plot(np.arange(10)+1, errors_50, color='Yellow', marker='o', markersize=10, label='kNN 50') plt.grid(1) # Turns the grid on plt.title('Plot of Knn with FDA Error rates') plt.legend(loc='upper right') # Puts a legend on the plot plt.show() print(confusion_matrix(digits.test_labels,labels_Full[5])) print(confusion_matrix(digits.test_labels,fda[5])) print(confusion_matrix(digits.test_labels,labels_50[5])) """
# remove axis spines ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["left"].set_visible(False) plt.tight_layout plt.grid() plt.show() plot_pca() # LDA via scikit-learn # LDA sklearn_lda = LDA(n_components=2) X_lda_sklearn = sklearn_lda.fit_transform(X, y) def plot_scikit_lda(X, title): ax = plt.subplot(111) for label, marker, color in zip(range(1, 4), ('^', 's', 'o'), ('blue', 'red', 'green')): plt.scatter( x=X[:, 0][y == label], y=X[:, 1][y == label] * -1, # flip the figure marker=marker, color=color, alpha=0.5, label=label_dict[label]) plt.xlabel('LD1')
for i, alpha in enumerate([0., 10., 1000.]): # Fit and transform data using PLDA plda = PLDA(alpha=alpha, n_components=2) X_plda = plda.fit_transform(X, y) # Compute classification accuracy acc = plda.score(X, y) # Plot transformed data plot_transform(X_plda, y, ax[0, i]) ax[0, i].set_title("PLDA $\\alpha$={:.1f}\nacc={:.3f}".format(alpha, acc)) # For comparison, perform LDA # Note: This should be the same as PLDA with alpha=0 lda = LinearDiscriminantAnalysis() X_lda = lda.fit_transform(X, y) acc = lda.score(X, y) plot_transform(X_lda, y, ax[1, 0]) ax[1, 0].set_title("LDA\nacc={:.3f}".format(acc)) # For comparison, perform PCA # Note: This should be the same as PLDA with very large alpha pca = PCA() X_pca = pca.fit_transform(X) plot_transform(X_pca, y, ax[1, -1]) ax[1, 2].set_title("PCA\nacc=N/A") # Ignore the middle subplot ax[1, 1].axis('off') plt.tight_layout()
# In[13]: clf = KNeighborsClassifier(n_neighbors=7) clf.fit(train_df[['sepel_len', 'sepel_width', 'pedal_len', 'pedal_width']], train_df['class']) clf.score(test_df[['sepel_len', 'sepel_width', 'pedal_len', 'pedal_width']], test_df['class']) # In[14]: train_df.iloc[:, 0:4].head() # In[15]: sklearn_LDA = LDA(n_components=2) train_r = sklearn_LDA.fit_transform(train_df.iloc[:, 0:4],train_df['class']) # In[16]: train_df_r=pd.DataFrame(train_r,columns=['feature1', 'feature2']) # In[17]: train_df_r = pd.concat([train_df_r,train_df['class'].reset_index(drop=True)], axis=1 ) # In[18]: train_df_r.head()
sc = StandardScaler() Xtrain_sc = sc.fit_transform(Xtrain) Xtest_sc = sc.transform(Xtest) #Componentes principales from sklearn.decomposition import PCA ##como solo tengo 2 variables escojo 1 pca = PCA(n_components=1) Xtrain_pca = pca.fit_transform(Xtrain_sc) Xtest_pca = pca.transform(Xtest_sc) #Discriminante linear from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA lda = LDA(n_components=1) Xtrain_lda = lda.fit_transform(Xtrain_sc, ytrain) Xtest_lda = lda.transform(Xtest_sc) #Kernel PCA from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components=1, kernel='rbf') Xtrain_kpca = kpca.fit_transform(Xtrain_sc) Xtest_kpca = kpca.transform(Xtest_sc) #Regresion Logistica ####################### from sklearn.linear_model import LogisticRegression ##R.logistica es un algoritmo iterativo por eso usamos random_state para tener aprox ##los mismos resultados logistic = LogisticRegression(random_state=4)
from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.preprocessing import MinMaxScaler from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA import kutuphane giris, cikis, CustomerID = kutuphane.dosya_oku('data/Credit_Card_Applications.csv') #kisi_bilgisi['Country'] = kisi_bilgisi['Country'].replace([":",','],"").astype(int) scaler = StandardScaler() X = scaler.fit_transform(giris) pca = PCA(n_components=50) pca_x = pca.fit_transform(X) accuracy, f1_skor = kutuphane.basari_hesaplaCV(pca_x, cikis,CustomerID) print("pca basarisi = "+ str(accuracy) ) lda = LDA(n_components=2) lda_x =lda.fit_transform(X,cikis) accuracy, f1_skor = kutuphane.basari_hesapla(lda_x, cikis, CustomerID) print("LDA basarisi = "+ str(accuracy) )
print(train_y.shape,test_y.shape) from sklearn.preprocessing import LabelEncoder , OneHotEncoder from sklearn.compose import ColumnTransformer le=LabelEncoder() train_y=le.fit_transform(train_y).reshape(-1,1) le2=LabelEncoder() test_y=le2.fit_transform(test_y).reshape(-1,1) # # --------------------------------------------------------------------------------- # DIMENSIONALITY REDUCTION USING LDA FOR VISUALIZATION from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA lda=LDA(n_components=2) train_x=lda.fit_transform(train_x,train_y) test_x=lda.transform(test_x) # ----------------------------------------------------------------------------------- # pdb.set_trace() # COMPARING MODEL from sklearn.model_selection import cross_val_score from sklearn.model_selection import StratifiedKFold from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.svm import SVC models=[] #--- models will contain tuples whose fist element will be name and second element will be model
class Assignment(tk.Frame): def Apply(self): self.label_imgtype.destroy() plt.close('all') self.original_image, self.result_image, self.binary_image, self.filtered_image = feature_extractor_GUI.extract_features_prediction( self.filename) self.original_image = cv2.cvtColor(self.original_image, cv2.COLOR_BGR2RGB) feature_extractor_GUI.DEV_drawGui(self.original_image, self.result_image, self.binary_image) plt.figure(2) plt.plot(self.filtered_image) plt.xlabel('Angles(deg)') plt.xlabel('Angles(Deg)') plt.ylabel('Frequency') plt.title('Histogram of Filtered Image with Angle Calculation') plt.show() self.label_imgtype = tk.Label(root, text="Features Extracted Successfully") self.label_imgtype.config(font=("Times New Roman", 14)) self.label_imgtype.grid(row=5, columnspan=2) self.feature_extraction_flag = True def browse(self): self.label_imgtype.destroy() self.filename = filedialog.askopenfilename() self.img = cv2.imread(self.filename, 1) self.b, self.g, self.r = cv2.split(self.img) self.img1 = cv2.merge((self.r, self.g, self.b)) self.img2 = Image.fromarray(self.img1) self.img = self.img2.resize((512, 512)) self.canvas.image = ImageTk.PhotoImage(self.img) self.canvas.create_image(0, 0, image=self.canvas.image, anchor='nw') self.label_imgtype = tk.Label( root, text="The Image Loaded, Features not Extracted") self.label_imgtype.config(font=("Times New Roman", 14)) self.label_imgtype.grid(row=5, columnspan=2) def classify(self): #Check if feature extraction has been done, if not run the function if self.feature_extraction_flag is False: self.Apply() #Check if the model has been trained or not, if not train the model self.result_label.destroy() if self.train_flag is False: self.model_train() filename = 'features_test.csv' data, predict_features, _ = data_file_reader.file_reader( filename, 'test') lda_test_set = self.lda.transform(predict_features) prediction = self.clf.predict(lda_test_set) if prediction == 0: self.result_label = tk.Label( root, text="The Image Contains a Natural Scene", wraplength=200) print('The Image Contains a Natural Scene') else: self.result_label = tk.Label( root, text="The Image Contains Man Made Objects in the Scene", wraplength=200) print('The Image Contains Man Made Objects in the Scene') self.result_label.config(font=("Times New Roman", 14)) self.result_label.place(relx=0.7, rely=0.45, anchor='sw') def model_train(self): self.label_training_state.destroy() filename = 'features_train.csv' data, features, labels = data_file_reader.file_reader( filename, 'train') self.svc = SVC(kernel='linear', C=1) self.rf = RandomForestClassifier(n_estimators=50, random_state=1) self.knn = KNeighborsClassifier(n_neighbors=3) self.mv = VotingClassifier(estimators=[('rf', self.rf), ('knn', self.knn), ('svc', self.svc)], voting='hard') self.lda = LDA(n_components=200) lda_train_set = self.lda.fit_transform(features, np.ravel(labels)) if self.comboExample.get() == "Majority Voting": self.clf = self.mv.fit(lda_train_set, np.ravel(labels)) classifier_label = "Model Trained Successfully on Majority Voting" elif self.comboExample.get() == "SVC": self.clf = self.svc.fit(lda_train_set, np.ravel(labels)) classifier_label = "Model Trained Successfully on SVC" elif self.comboExample.get() == "KNN": self.clf = self.knn.fit(lda_train_set, np.ravel(labels)) classifier_label = "Model Trained Successfully on KNN" else: self.clf = self.rf.fit(lda_train_set, np.ravel(labels)) classifier_label = "Model Trained Successfully on Random Forest" self.label_training_state = tk.Label(root, text=classifier_label, wraplength=200) self.label_training_state.config(font=("Times New Roman", 12)) self.label_training_state.grid(row=6, column=8) self.train_flag = True def __init__(self, root): tk.Frame.__init__(self, root) self.train_flag = False self.feature_extraction_flag = False # BROWSE self.btn_browse = tk.Button(root, text="Browse", command=self.browse) self.btn_browse.grid(row=0, column=0) # APPLY self.btn_feature_extract = tk.Button(root, text="Extract Features", command=self.Apply) self.btn_feature_extract.grid(row=0, column=1) # Classify Image self.btn_classify = tk.Button(root, text="Classify Image", command=self.classify) self.btn_classify.grid(row=5, column=5) # Train Model self.model_train_btn = tk.Button(root, text="Train Model", command=self.model_train) self.model_train_btn.grid(row=5, column=8) # CANVAS self.canvas = tk.Canvas(root, width=800, height=800) self.canvas.grid(row=10, columns=10) self.label_imgtype = tk.Label(root, text="No Image Loaded") self.label_imgtype.config(font=("Times New Roman", 14)) self.label_imgtype.grid(row=5, columnspan=2) # Labels self.label_training_state = tk.Label(root, text="Not Trained") self.label_training_state.grid(row=6, column=8) self.result_label = tk.Label(root, text="No Classification") self.result_label.config(font=("Times New Roman", 14)) self.result_label.place(relx=0.7, rely=0.4, anchor='sw') self.course_label = tk.Label(root, text="Pattern Recognition (LOTI.05.046)") self.course_label.config(font=("Times New Roman", 16)) self.course_label.place(relx=0.34, rely=0.92, anchor='sw') self.title_label = tk.Label( root, text="Man made object detection in natural scenes") self.title_label.config(font=("Times New Roman", 16)) self.title_label.place(relx=0.3, rely=0.95, anchor='sw') self.classifier_label = tk.Label(root, text="Choose Classifier") self.classifier_label.grid(row=0, column=8) self.comboExample = ttk.Combobox( root, values=["SVC", "Random Forest", "KNN", "Majority Voting"]) self.comboExample.grid(column=8, row=1) self.comboExample.current(3) self.logo = cv2.imread('tartu_logo.jpg', 1) self.b, self.g, self.r = cv2.split(self.logo) self.logo1 = cv2.merge((self.r, self.g, self.b)) self.logo2 = Image.fromarray(self.logo1) self.logo = self.logo2.resize((180, 180)) self.canvas.image_logo = ImageTk.PhotoImage(self.logo) self.canvas.create_image(0, 540, image=self.canvas.image_logo, anchor='nw')
sc_x = StandardScaler() sc_y = StandardScaler() # Scale X X_train = sc_x.fit_transform(X_train) X_test = sc_x.transform(X_test) # Scale y ###################### 3- Training ###################### # PCA n_components = 2 lda = LDA(n_components = n_components) _X_train = lda.fit_transform(X_train, y_train) _X_test = lda.fit_transform(X_test, y_test) # Logistic Regression classifier = LogisticRegression() classifier.fit(_X_train, y_train) ###################### 3- Testing ###################### y_pred = classifier.predict(_X_test) cm = confusion_matrix(y_test, y_pred) ###################### 3- Visualization ######################
class LDA(CtrlNode): """Linear Discriminant Analysis, uses sklearn""" nodeName = "LDA" uiTemplate = [('train_data', 'list_widget', {'selection_mode': QtWidgets.QAbstractItemView.ExtendedSelection, 'toolTip': 'Column containing the training data'}), ('train_labels', 'combo', {'toolTip': 'Column containing training labels'}), ('solver', 'combo', {'items': ['svd', 'lsqr', 'eigen']}), ('shrinkage', 'combo', {'items': ['None', 'auto', 'value']}), ('shrinkage_val', 'doubleSpin', {'min': 0.0, 'max': 1.0, 'step': 0.1, 'value': 0.5}), ('n_components', 'intSpin', {'min': 2, 'max': 1000, 'step': 1, 'value': 2}), ('tol', 'intSpin', {'min': -50, 'max': 0, 'step': 1, 'value': -4}), ('score', 'lineEdit', {}), ('predict_on', 'list_widget', {'selection_mode': QtWidgets.QAbstractItemView.ExtendedSelection, 'toolTip': 'Data column of the input "predict" Transmission\n' 'that is used for predicting from the model'}), ('Apply', 'check', {'applyBox': True, 'checked': False}) ] def __init__(self, name, **kwargs): CtrlNode.__init__(self, name, terminals={'train': {'io': 'in'}, 'predict': {'io': 'in'}, 'T': {'io': 'out'}, 'coef': {'io': 'out'}, 'means': {'io': 'out'}, 'predicted': {'io': 'out'} }, **kwargs) self.ctrls['score'].setReadOnly(True) def process(self, **kwargs): return self.processData(**kwargs) def processData(self, train: Transmission, predict: Transmission): self.t = train.copy() #: Transmisison instance containing the training data with the labels if predict is not None: self.to_predict = predict.copy() #: Transmission instance containing the data to predict after fitting on the the training data dcols, ccols, ucols = organize_dataframe_columns(self.t.df.columns) self.ctrls['train_data'].setItems(dcols) self.ctrls['train_labels'].setItems(ccols) if predict is not None: pdcols, ccols, ucols = organize_dataframe_columns(self.to_predict.df.columns) self.ctrls['predict_on'].setItems(pdcols) if not self.apply_checked(): return train_columns = self.ctrls['train_data'].getSelectedItems() labels = self.ctrls['train_labels'].currentText() solver = self.ctrls['solver'].currentText() shrinkage = self.ctrls['shrinkage'].currentText() if shrinkage == 'value': shrinkage = self.ctrls['shrinkage_val'].value() elif shrinkage == 'None': shrinkage = None n_components = self.ctrls['n_components'].value() tol = 10 ** self.ctrls['tol'].value() store_covariance = True if solver == 'svd' else False params = {'train_data': train_columns, 'train_labels': labels, 'solver': solver, 'shrinkage': shrinkage, 'n_components': n_components, 'tol': tol, 'store_covariance': store_covariance } kwargs = params.copy() kwargs.pop('train_data') kwargs.pop('train_labels') self.lda = LinearDiscriminantAnalysis(**kwargs) # Make an array of all the data from the selected columns self.X = np.hstack([np.vstack(self.t.df[train_column]) for train_column in train_columns]) self.y = self.t.df[labels] self.X_ = self.lda.fit_transform(self.X, self.y) self.t.df['_LDA_TRANSFORM'] = self.X_.tolist() self.t.df['_LDA_TRANSFORM'] = self.t.df['_LDA_TRANSFORM'].apply(np.array) params.update({'score': self.lda.score(self.X, self.y), 'classes': self.lda.classes_.tolist() }) self.ctrls['score'].setText(f"{params['score']:.4f}") self.t.history_trace.add_operation('all', 'lda', params) self.t.df['_LDA_DFUNC'] = self.lda.decision_function(self.X).tolist() coef_df = pd.DataFrame({'classes': self.lda.classes_, '_COEF': self.lda.coef_.tolist()}) t_coef = Transmission(df=coef_df, history_trace=self.t.history_trace) means_df = pd.DataFrame({'classes': self.lda.classes_, '_MEANS': self.lda.means_.tolist()}) t_means = Transmission(df=means_df, history_trace=self.t.history_trace) out = {'T': self.t, 'coef': t_coef, 'means': t_means, 'predicted': None} # Predict using the trained model predict_columns = self.ctrls['predict_on'].getSelectedItems() if not predict_columns: return out if predict_columns != train_columns: QtWidgets.QMessageBox.warning('Predict and Train columns do not match', 'The selected train and predict columns are different') predict_data = np.hstack([np.vstack(self.to_predict.df[predict_column]) for predict_column in predict_columns]) self.to_predict.df['LDA_PREDICTED_LABELS'] = self.lda.predict(predict_data) self.to_predict.df['_LDA_TRANSFORM'] = self.lda.transform(predict_data).tolist() self.to_predict.df['_LDA_TRANSFORM'] = self.to_predict.df['_LDA_TRANSFORM'].apply(np.array) params_predict = params.copy() params_predict.update({'predict_columns': predict_columns}) self.to_predict.history_trace.add_operation('all', 'lda-predict', params_predict) out.update({'predicted': self.to_predict}) return out
def lda(X, y, *, solver='svd', shrinkage=None, n_components=None): """Linear discriminant analysis. This function reduces the dimensionality of the input by projecting it to the most discriminative directions. Parameters ---------- X : ndarray of shape (n_samples, n_features_pre) Feature matrix. y : ndarray of shape (n_samples,) Response variables. solver : string, default='svd' 'svd' : Singular value decomposition 'lsqr' : Least squares solution 'eigen' : Eigenvalue decomposition shrinkage : string, float, or None, default=None Shrinkage parameter. None : no shrinkage 'auto' : automatic shrinkage using the Ledoit-Wolf lemma float between 0 and 1: fixed shrinkage parameter n_components : int or None, default=None Number of components for dimensionality reduction. This parameter cannot be larger than min(n_features, n_classes - 1). Returns ------- arr : ndarray of shape (n_samples, n_features_post) Array containing the LDA-transformed features. Examples -------- >>> import numpy as np >>> from protlearn.features import aac, aaindex1, ngram >>> from protlearn.dimreduction import lda >>> seqs = ['ARKLY', 'EERKPGL', 'PGPGEERNLY'] >>> labels = [1., 0., 0.] >>> comp, _ = aac(seqs) >>> aaind, _ = aaindex1(seqs) >>> ng, _ = ngram(seqs) >>> features = np.concatenate([comp, aaind, ng], axis=1) >>> features.shape (3, 575) >>> reduced = lda(features, labels, n_components=1) >>> reduced.shape (3, 1) """ mdl = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage, n_components=n_components) arr = mdl.fit_transform(X, y) return arr
y, test_size=0.30, random_state=20, stratify=y) #Standardize Features from sklearn.preprocessing import StandardScaler sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) #Linear Discriminant Analysis from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA lda = LDA(n_components=6) X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.fit_transform(X_test_std, y_test) #Plot 2-D LDA markers = ('s', 'x', 'o', '^', 'v', 'P', '*') #plt.scatter(X_train_lda[:, 0], X_train_lda[:, 1], c=y_train, cmap=plt.cm.Paired, marker=markers[0]) #plt.xlabel('LDA 1') #plt.ylabel('LDA 2') #Declare empty arrays for accuracy and number of neigbours accuracy = np.zeros((6, 1)) accuracy_val = np.zeros((6, 1)) neighbours = np.linspace(2, 7, 6) for x in neighbours:
from sklearn.decomposition import PCA pca = PCA(17) fit = pca.fit(dataX, dataY) train2 = pca.transform(dataX) acuu(train2, dataY) import warnings warnings.filterwarnings("ignore") from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA lda = LDA(n_components=17) fit = lda.fit_transform(dataX, dataY) train3 = lda.transform(dataX) # fit = lda.fit(X=dataX, y=dataY) # train2 = fit.fit_transform(dataX) print(train3.shape) acuu(train3, dataY) #-------------------------------------------------------------------------------------------------------------------- #-------------------------------------------------------------------------------------------------------------------- from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 test = SelectKBest(score_func=chi2, k=11) # k is number of features
from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.preprocessing import Normalizer ''' 特征降维 PCA ''' import pandas as pd train = pd.read_csv(r"G:\比赛分享\data\alltrain.csv") test = pd.read_csv(r"G:\比赛分享\data\alltest.csv") y = train['label'] del train['label'] del train['id'] id_a = test['id'] del test['id'] nor = Normalizer() train = nor.fit_transform(train) test = nor.transform(test) lda = LDA(n_components=1) #分类标签数-1 就只能产生一维的特征 train_lda = lda.fit_transform(train, y) test_lda = lda.transform(test) pca = PCA(50) train_pca = pca.fit_transform(train) test_pca = pca.transform(test)
# Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X = sc.fit_transform(X) # Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Applying LDA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA lda = LDA(n_components=2) X_train = lda.fit_transform(X_train, y_train) X_test = lda.transform(X_test) # Training the Logistic Regression model on the Training set from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state=0) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print(cm)
def lda_project(spike_times, spike_clusters, event_times, event_groups, pre_time=0, post_time=0.5, cross_validation='kfold', num_splits=5, prob_left=None, custom_validation=None): """ Use linear discriminant analysis to project population vectors to the line that best separates the two groups. When cross-validation is used, the LDA projection is fitted on the training data after which the test data is projected to this projection. spike_times : 1D array spike times (in seconds) spike_clusters : 1D array cluster ids corresponding to each event in `spikes` event_times : 1D array times (in seconds) of the events from the two groups event_groups : 1D array group identities of the events, can be any number of groups, accepts integers and strings cross_validation : string which cross-validation method to use, options are: 'none' No cross-validation 'kfold' K-fold cross-validation 'leave-one-out' Leave out the trial that is being decoded 'block' Leave out the block the to-be-decoded trial is in 'custom' Any custom cross-validation provided by the user num_splits : integer ** only for 'kfold' cross-validation ** Number of splits to use for k-fold cross validation, a value of 5 means that the decoder will be trained on 4/5th of the data and used to predict the remaining 1/5th. This process is repeated five times so that all data has been used as both training and test set. prob_left : 1D array ** only for 'block' cross-validation ** the probability of the stimulus appearing on the left for each trial in event_times custom_validation : generator ** only for 'custom' cross-validation ** a generator object with the splits to be used for cross validation using this format: ( (split1_train_idxs, split1_test_idxs), (split2_train_idxs, split2_test_idxs), (split3_train_idxs, split3_test_idxs), ...) n_neurons : int Group size of number of neurons to be sub-selected Returns ------- lda_projection : 1D array the position along the LDA projection axis for the population vector of each trial """ # Check input assert cross_validation in [ 'none', 'kfold', 'leave-one-out', 'block', 'custom' ] assert event_times.shape[0] == event_groups.shape[0] if cross_validation == 'block': assert event_times.shape[0] == prob_left.shape[0] if cross_validation == 'custom': assert isinstance(custom_validation, types.GeneratorType) # Get matrix of all neuronal responses times = np.column_stack( ((event_times - pre_time), (event_times + post_time))) pop_vector, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times) pop_vector = pop_vector.T # Initialize lda = LinearDiscriminantAnalysis() lda_projection = np.zeros(event_groups.shape) if cross_validation == 'none': # Find the best LDA projection on all data and transform those data lda_projection = lda.fit_transform(pop_vector, event_groups) else: # Perform cross-validation if cross_validation == 'leave-one-out': cv = LeaveOneOut().split(pop_vector) elif cross_validation == 'kfold': cv = KFold(n_splits=num_splits).split(pop_vector) elif cross_validation == 'block': block_lengths = [sum(1 for i in g) for k, g in groupby(prob_left)] blocks = np.repeat(np.arange(len(block_lengths)), block_lengths) cv = LeaveOneGroupOut().split(pop_vector, groups=blocks) elif cross_validation == 'custom': cv = custom_validation # Loop over the splits into train and test for train_index, test_index in cv: # Find LDA projection on the training data lda.fit(pop_vector[train_index], [event_groups[j] for j in train_index]) # Project the held-out test data to projection lda_projection[test_index] = lda.transform( pop_vector[test_index]).T[0] return lda_projection
# Perform Leave One Out validation for the LDA - Decision Tree Classifier total_score=0 for train_index,test_index in LOO.split(feature): train_features, test_features = feature[train_index], feature[test_index] train_labels, test_labels = labels[train_index], labels[test_index] lda = LDA() lda=lda.fit(train_features,train_labels.ravel()) lda_train_set = lda.transform(train_features) lda_test_set = lda.transform(test_features) clf_lda=d3.fit(lda_train_set,train_labels) prediction_lda=clf_lda.predict(lda_test_set) total_score+=accuracy_score(test_labels,prediction_lda) mean_score=(total_score/number_of_iterations) score = mean_score print("LDA Scores + leave one cross_validation:",score) # Perform Cross Validation for 10 folds for the LDA-Decision Tree Classifier lda = LDA() lda_features=lda.fit_transform(feature,labels.ravel()) scores = cross_val_score(d3, lda_features, labels, cv=10) scores = scores.mean() print("LDA Scores + 10 fold cross_validation:",scores)
# --------- scaler = MinMaxScaler(copy=True, feature_range=(0, 1)) X = scaler.fit_transform(X) # --------- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 3, random_state=16, shuffle=True) print(X_train.shape, X_test.shape) print(y_train.shape, y_test.shape) print("=" * 25) # --------- # Applying LDAModel Model LDAModel = LDA(n_components=2, solver='svd') X = LDAModel.fit_transform(X, y) # X_train = LDAModel.fit_transform(X_train,y_train) # X_test = LDAModel.transform(X_test) print(X.shape) print("=" * 10) # --------- scaler = MinMaxScaler(copy=True, feature_range=(0, 1)) X = scaler.fit_transform(X) # --------- # ---------------------------------------------------- # Calculating Details print('LDAModel Train Score is : ', LDAModel.score(X_train, y_train)) print('LDAModel Test Score is : ', LDAModel.score(X_test, y_test)) print("=" * 10) # LDAModel Train Score is : 0.98 # LDAModel Test Score is : 0.98
def run(train_pyramid_descriptors, D, test_pyramid_descriptors, feat_des_options): train_images_filenames = cPickle.load( open('train_images_filenames.dat', 'rb')) test_images_filenames = cPickle.load( open('test_images_filenames.dat', 'rb')) train_labels = cPickle.load(open('train_labels.dat', 'rb')) test_labels = cPickle.load(open('test_labels.dat', 'rb')) k = feat_des_options['k'] codebook = MiniBatchKMeans(n_clusters=k, verbose=False, batch_size=k * 20, compute_labels=False, reassignment_ratio=10**-4, random_state=42) codebook.fit(D) visual_words_pyramid = np.zeros((len(train_pyramid_descriptors), k * len(train_pyramid_descriptors[0])), dtype=np.float32) for i in range(len(train_pyramid_descriptors)): visual_words_pyramid[i, :] = spatial_pyramid_histograms( train_pyramid_descriptors[i], codebook, k) knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1, metric='euclidean') knn.fit(visual_words_pyramid, train_labels) # logreg = LogisticRegression(random_state=0,max_iter=300).fit(visual_words_pyramid, train_labels) # scores = cross_validate(logreg, visual_words_pyramid, train_labels,scoring = ['precision_macro', 'recall_macro','f1_macro'], cv=5,return_estimator=True) scores = cross_validate( knn, visual_words_pyramid, train_labels, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'], cv=8, return_estimator=True) cross_val_accuracy = scores['test_accuracy'].mean() cross_val_precision = scores['test_precision_macro'].mean() cross_val_recall = scores['test_recall_macro'].mean() cross_val_f1 = scores['test_f1_macro'].mean() # print("%0.2f precision with a std dev of %0.2f" % (cross_val_precision, scores['test_precision_macro'].std())) # print("%0.2f recall with a std dev of %0.2f" % (cross_val_recall, scores['test_recall_macro'].std())) # print("%0.2f F1-score with a std dev of %0.2f" % (cross_val_f1, scores['test_f1_macro'].std())) visual_words_test = np.zeros( (len(test_images_filenames), visual_words_pyramid.shape[1]), dtype=np.float32) for i in range(len(test_images_filenames)): visual_words_test[i, :] = spatial_pyramid_histograms( test_pyramid_descriptors[i], codebook, k) test_accuracy = 100 * knn.score(visual_words_test, test_labels) # print("Test accuracy: %0.2f" % (test_accuracy)) test_prediction = knn.predict(visual_words_test) # test_prediction = logreg.predict(visual_words_test) test_precision, test_recall, test_fscore, _ = precision_recall_fscore_support( test_labels, test_prediction, average='macro') # print("%0.2f precision" % (test_precision)) # print("%0.2f recall" % (test_recall)) # print("%0.2f F1-score" % (test_fscore)) # pca = PCA(n_components=64) pca = PCA(n_components=feat_des_options['pca_perc'], svd_solver='full') VWpca = pca.fit_transform(visual_words_pyramid) knnpca = KNeighborsClassifier(n_neighbors=5, n_jobs=-1, metric='euclidean') knnpca.fit(VWpca, train_labels) vwtestpca = pca.transform(visual_words_test) pca_test_accuracy = 100 * knnpca.score(vwtestpca, test_labels) # print("PCA Test accuracy: %0.2f" % (pca_test_accuracy)) scores_pca = cross_validate( knnpca, visual_words_pyramid, train_labels, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'], cv=8, return_estimator=True) cross_val_accuracy_pca = scores_pca['test_accuracy'].mean() cross_val_precision_pca = scores_pca['test_precision_macro'].mean() cross_val_recall_pca = scores_pca['test_recall_macro'].mean() cross_val_f1_pca = scores_pca['test_f1_macro'].mean() lda = LinearDiscriminantAnalysis(n_components=7) VWlda = lda.fit_transform(visual_words_pyramid, train_labels) knnlda = KNeighborsClassifier(n_neighbors=5, n_jobs=-1, metric='euclidean') knnlda.fit(VWlda, train_labels) vwtestlda = lda.transform(visual_words_test) lda_test_accuracy = 100 * knnlda.score(vwtestlda, test_labels) # print("LDA Test accuracy: %0.2f" % (lda_test_accuracy)) return [ cross_val_accuracy, cross_val_precision, cross_val_recall, cross_val_f1, test_precision, test_recall, test_fscore, test_accuracy, pca_test_accuracy, cross_val_accuracy_pca, cross_val_precision_pca, cross_val_recall_pca, cross_val_f1_pca, lda_test_accuracy ]
dataset = pd.read_csv('../../data/Wine.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, -1].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Feature Scaling sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.fit_transform(X_test) # Dimensionality Reduction with LDA, n_components -> number of dimensions we want to get to lda = LinearDiscriminantAnalysis(n_components=2) X_train = lda.fit_transform( X_train, y_train) # y_train is necessary since it is supervised algorithm X_test = lda.transform(X_test) # Model classifier = LogisticRegression(random_state=0) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix:", cm) print("Accuracy Score:", accuracy_score(y_test, y_pred))
color='blue', linewidth=4) plt.xlabel('Number of components') plt.ylabel('Cumulative explained variance') plt.show() #Applying Kernel PCA #Please Turn Off when applying PCA from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components=32, kernel='rbf') X_train = kpca.fit_transform(X_train) X_test = kpca.transform(X_test) #Applying LDA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA lda = LDA(n_components=35) X_train = lda.fit_transform(X_train, Y_train) X_test = lda.fit_transform(X_test, Y_test) #Fitting SVM to the Training Set from sklearn.svm import SVC classifier = SVC( kernel='rbf', random_state=0) #kernel can be changed to linear for linear SVM classifier.fit(X_train, Y_train) #Fitting Decision Tree to the Training Set from sklearn.tree import DecisionTreeClassifier classifier = DecisionTreeClassifier(criterion='entropy', random_state=0) classifier.fit(X_train, Y_train) #Predicting the Test Set Results
#[email protected] """ 使得空间中两类的距离尽可能的远。 """ print(__doc__) import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np from sklearn.discriminant_analysis import LinearDiscriminantAnalysis mean = [0, 0] # 平均值 cov = [[1, 0.9], [0.9, 1]] # 协方差 x, y = np.random.multivariate_normal(mean, cov, 1000).T x = np.reshape(x, [-1, 1]) y = np.reshape(y, [-1, 1]) X = np.concatenate([x, y], axis=1) label = np.zeros_like(x[:, 0]) label[x[:, 0] > 0] = 1 pca = LinearDiscriminantAnalysis() X_pca = pca.fit_transform(X, label) fig = plt.figure() ax = fig.add_subplot(211) ax.scatter(X[:, 0], X[:, 1], c=label) ax.axis("equal") ax = fig.add_subplot(212) ax.scatter(X_pca, np.zeros_like(X_pca), c=label) ax.axis("equal") plt.show()
#y_test = y_test[np.newaxis] print("Shape of Train set features (X_train) : ", X_train.shape) print("Shape of Train set labels (y_train) : ", y_train.shape) print("Shape of Test set features (X_test) : ", X_test.shape) print("Shape of Test set labels (y_test) : ", y_test.shape) # ### 5. APPLYING LDA TO TEST AND TRAIN # In[5]: ### LDA lda = LinearDiscriminantAnalysis() X_train_lda = lda.fit_transform(X_train, y_train) X_test_lda = lda.transform(X_test) print("Shape of Feature Test set Before LDA: ", X_train.shape) print("Shape of Feature Test set After LDA: ", X_train_lda.shape) print("Shape of Feature Test set Before LDA: ", X_test.shape) print("Shape of Feature Test set After LDA: ", X_test_lda.shape) # ### 5.1 Applying LDA to Linear SVM # In[6]: ###SVM - Linear
def show_scatter(ax, title, A, A_axis, mask, features, args): ''' show the scatter plot (mainly A matrix) ''' if args.factor is not None: cmap = matplotlib.cm.get_cmap('rainbow') # display index of each point in gray style # for i, (x, y) in enumerate( A ): # ax.text( x, y, str(i+1), # color='gray', # fontsize=4, # alpha=0.4, # horizontalalignment='center', # verticalalignment='center' ) # scatter with markers/colors C, Cnames, M, Mnames, y = parse_color_marker(args.factor, args.color, args.marker) if A.shape[1] > 2: from sklearn.discriminant_analysis import LinearDiscriminantAnalysis lda = LinearDiscriminantAnalysis(n_components=2) A = lda.fit_transform(A, y) print(lda.explained_variance_ratio_, lda.explained_variance_ratio_.sum()) for marker in set(M): idx = np.array([_m == marker for _m in M]) ax.scatter(A[idx, 0], A[idx, 1], c=C[idx], marker=marker, cmap=cmap, alpha=.5, s=7) # generate the legend handles = [] for c, name in Cnames: handles.append(mpatches.Patch(color=cmap(c), label=name)) for m, name in Mnames: handles.append( mlines.Line2D([], [], c='k', lw=0.5, marker=m, label=name)) #ax.legend( handles=handles, loc='best', prop={'size': 6} ) else: cmap = matplotlib.cm.get_cmap('Spectral') for i, (x, y) in enumerate(A): if not mask[i]: ax.text(x, y, str(i + 1), color=cmap(i / A.shape[0]), fontsize=4, fontweight='black', alpha=0.8, horizontalalignment='center', verticalalignment='center') ax.scatter(A[mask, 0], A[mask, 1], color='black', s=8, alpha=0.8) # show the axis if A_axis is not None: cmap = matplotlib.cm.get_cmap('ocean') score_a = [] for i, a in enumerate(axis): _xy = axis_curves[i * 20:(i + 1) * 20] - A.mean(0) score_a.append((a, np.linalg.norm(_xy))) score_a.sort(key=lambda _: _[1], reverse=True) a_list = [score_a[i][0] for i in range(7)] c = 0 for i, a in enumerate(axis): if not (a in a_list): continue color = c / (len(a_list) + 1) c += 1 _x = axis_curves[i * 20:(i + 1) * 20, 0] _y = axis_curves[i * 20:(i + 1) * 20, 1] ax.plot(_x, _y, c=cmap(color)) ax.arrow(_x[-2], _y[-2], _x[-1] - _x[-2], _y[-1] - _y[-2], color=cmap(color), head_width=.3) ax.text(_x[-1], _y[-1], features[a], fontsize=7, color=cmap(color), alpha=.7) print(features[a]) # set the plotting axis xmin = A[:, 0].min() xmax = A[:, 0].max() margin = 0.05 * (xmax - xmin) xmin -= margin xmax += margin ax.set_xlim(xmin, xmax) ax.set_xticks([xmin, xmax]) ymin = A[:, 1].min() ymax = A[:, 1].max() margin = 0.05 * (ymax - ymin) ymin -= margin ymax += margin ax.set_ylim(ymin, ymax) ax.set_yticks([ymin, ymax]) ax.set_xlabel(r'\#comp1', labelpad=-10) ax.set_ylabel(r'\#comp2', labelpad=-25) ax.set_title(title)
def main(): attrs, classes = prepare_ds('cov_data.csv') for cls, color in zip(range(1, 4), ('red', 'green', 'blue')): attr_one = attrs[:, 0][classes == cls] attr_two = attrs[:, 1][classes == cls] p = pearsonr(attr_one, attr_two) plt.scatter(x=attr_one, y=attr_two, marker='o', color=color, label='cls: {:}, pearsonr={:.2f}'.format(cls, p[0])) plt.title('Pearson correlation') plt.xlabel('Elevation, m') plt.ylabel('Slope, num') plt.legend(loc='upper right') plt.show() data_train, data_test, class_train, class_test = train_test_split( attrs, classes, test_size=.3, random_state=123, ) lda = LDA(n_components=2) lda_transform = lda.fit_transform(data_train, class_train) plt.figure(figsize=(10, 8)) for cls, color in zip(range(1, 4), ('red', 'green', 'blue')): attr_one = lda_transform[:, 0][class_train == cls] attr_two = lda_transform[:, 1][class_train == cls] plt.scatter(x=attr_one, y=attr_two, marker='o', color=color, label='cls: {:}'.format(cls, p[0])) plt.xlabel('vec 1') plt.ylabel('vec 2') plt.legend() plt.show() lda_clf = LDA() lda_clf.fit(data_train, class_train) pred_train_lda = lda_clf.predict(data_train) print('Точность классификации на обучающем наборе данных (LDA): {:.2%}'. format(metrics.accuracy_score(class_train, pred_train_lda))) pred_test_lda = lda_clf.predict(data_test) print('Точность классификации на тестовом наборе данных (LDA): {:.2%}'. format(metrics.accuracy_score(class_test, pred_test_lda))) qda_clf = QuadraticDiscriminantAnalysis() qda_clf.fit(data_train, class_train) pred_train_qda = qda_clf.predict(data_train) print('Точность классификации на обучающем наборе данных (QDA): {:.2%}'. format(metrics.accuracy_score(class_train, pred_train_qda))) pred_test_qda = qda_clf.predict(data_test) print('Точность классификации на тестовом наборе данных (QDA): {:.2%}'. format(metrics.accuracy_score(class_test, pred_test_qda)))
def discriminatePlot(X, y, cVal, titleStr='', figdir='.', Xcolname=None): # Frederic's Robust Wrapper for discriminant analysis function. Performs lda, qda and RF afer error checking, # Generates nice plots and returns cross-validated # performance, stderr and base line. # X np array n rows x p parameters # y group labels n rows # rgb color code for each data point - should be the same for each data beloging to the same group # titleStr title for plots # figdir is a directory name (folder name) for eps figures # Xcolname is a np.array or list of strings with column names for printout display # returns: ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses # Global Parameters CVFOLDS = 10 MINCOUNT = 10 MINCOUNTTRAINING = 5 # figdir = '/Users/frederictheunissen/Documents/Data/Julie/Acoustical Analysis/Figures Voice' # Initialize Variables and clean up data classes, classesCount = np.unique( y, return_counts=True ) # Classes to be discriminated should be same as ldaMod.classes_ goodIndClasses = np.array([n >= MINCOUNT for n in classesCount]) goodInd = np.array([b in classes[goodIndClasses] for b in y]) yGood = y[goodInd] XGood = X[goodInd] cValGood = cVal[goodInd] classes, classesCount = np.unique(yGood, return_counts=True) nClasses = classes.size # Number of classes or groups # Do we have enough data? if (nClasses < 2): print( 'Error in ldaPLot: Insufficient classes with minimun data (%d) for discrimination analysis' % (MINCOUNT)) return -1, -1, -1, -1, -1, -1, -1 cvFolds = min(min(classesCount), CVFOLDS) if (cvFolds < CVFOLDS): print( 'Warning in ldaPlot: Cross-validation performed with %d folds (instead of %d)' % (cvFolds, CVFOLDS)) # Data size and color values nD = XGood.shape[1] # number of features in X nX = XGood.shape[0] # number of data points in X cClasses = [] # Color code for each class for cl in classes: icl = (yGood == cl).nonzero()[0][0] cClasses.append(np.append(cValGood[icl], 1.0)) cClasses = np.asarray(cClasses) # Use a uniform prior myPrior = np.ones(nClasses) * (1.0 / nClasses) # Perform a PCA for dimensionality reduction so that the covariance matrix can be fitted. nDmax = int(np.fix(np.sqrt(nX / 5))) if nDmax < nD: print('Warning: Insufficient data for', nD, 'parameters. PCA projection to', nDmax, 'dimensions.') nDmax = min(nD, nDmax) pca = PCA(n_components=nDmax) Xr = pca.fit_transform(XGood) print('Variance explained is %.2f%%' % (sum(pca.explained_variance_ratio_) * 100.0)) # Initialise Classifiers ldaMod = LDA(n_components=min(nDmax, nClasses - 1), priors=myPrior, shrinkage=None, solver='svd') qdaMod = QDA(priors=myPrior) rfMod = RF() # by default assumes equal weights # Perform CVFOLDS fold cross-validation to get performance of classifiers. ldaScores = np.zeros(cvFolds) qdaScores = np.zeros(cvFolds) rfScores = np.zeros(cvFolds) skf = cross_validation.StratifiedKFold(yGood, cvFolds) iskf = 0 for train, test in skf: # Enforce the MINCOUNT in each class for Training trainClasses, trainCount = np.unique(yGood[train], return_counts=True) goodIndClasses = np.array([n >= MINCOUNTTRAINING for n in trainCount]) goodIndTrain = np.array( [b in trainClasses[goodIndClasses] for b in yGood[train]]) # Specity the training data set, the number of groups and priors yTrain = yGood[train[goodIndTrain]] XrTrain = Xr[train[goodIndTrain]] trainClasses, trainCount = np.unique(yTrain, return_counts=True) ntrainClasses = trainClasses.size # Skip this cross-validation fold because of insufficient data if ntrainClasses < 2: continue goodInd = np.array([b in trainClasses for b in yGood[test]]) if (goodInd.size == 0): continue # Fit the data trainPriors = np.ones(ntrainClasses) * (1.0 / ntrainClasses) ldaMod.priors = trainPriors qdaMod.priors = trainPriors ldaMod.fit(XrTrain, yTrain) qdaMod.fit(XrTrain, yTrain) rfMod.fit(XrTrain, yTrain) ldaScores[iskf] = ldaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]) qdaScores[iskf] = qdaMod.score(Xr[test[goodInd]], yGood[test[goodInd]]) rfScores[iskf] = rfMod.score(Xr[test[goodInd]], yGood[test[goodInd]]) iskf += 1 if (iskf != cvFolds): cvFolds = iskf ldaScores.reshape(cvFolds) qdaScores.reshape(cvFolds) rfScores.reshape(cvFolds) # Refit with all the data for the plots ldaMod.priors = myPrior qdaMod.priors = myPrior Xrr = ldaMod.fit_transform(Xr, yGood) # Check labels for a, b in zip(classes, ldaMod.classes_): if a != b: print('Error in ldaPlot: labels do not match') # Print the five largest coefficients of first 3 DFA MAXCOMP = 3 # Maximum number of DFA componnents MAXWEIGHT = 5 # Maximum number of weights printed for each componnent ncomp = min(MAXCOMP, nClasses) nweight = min(MAXWEIGHT, nD) weights = np.dot(ldaMod.coef_[0:ncomp, :], pca.components_) print('LDA Weights:') for ic in range(ncomp): idmax = np.argsort(np.abs(weights[ic, :]))[::-1] print('DFA %d: ' % ic, end='') for iw in range(nweight): if type(Xcolname) == None: colstr = 'C%d' % idmax[iw] else: colstr = Xcolname[idmax[iw]] print('%s %.3f; ' % (colstr, float(weights[ic, idmax[iw]])), end='') print() # Obtain fits in this rotated space for display purposes ldaMod.fit(Xrr, yGood) qdaMod.fit(Xrr, yGood) rfMod.fit(Xrr, yGood) XrrMean = Xrr.mean(0) # Make a mesh for plotting x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1)) xm1 = np.reshape(x1, -1) xm2 = np.reshape(x2, -1) nxm = np.size(xm1) Xm = np.zeros((nxm, Xrr.shape[1])) Xm[:, 0] = xm1 if Xrr.shape[1] > 1: Xm[:, 1] = xm2 for ix in range(2, Xrr.shape[1]): Xm[:, ix] = np.squeeze(np.ones((nxm, 1))) * XrrMean[ix] XmcLDA = np.zeros((nxm, 4)) # RGBA values for color for LDA XmcQDA = np.zeros((nxm, 4)) # RGBA values for color for QDA XmcRF = np.zeros((nxm, 4)) # RGBA values for color for RF # Predict values on mesh for plotting based on the first two DFs yPredLDA = ldaMod.predict_proba(Xm) yPredQDA = qdaMod.predict_proba(Xm) yPredRF = rfMod.predict_proba(Xm) # Transform the predictions in color codes maxLDA = yPredLDA.max() for ix in range(nxm): cWeight = yPredLDA[ix, :] # Prob for all classes cWinner = ( (cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses XmcLDA[ix, :] = np.dot(cWinner, cClasses) XmcLDA[ix, 3] = cWeight.max() / maxLDA # Plot the surface of probability plt.figure(facecolor='white', figsize=(10, 3)) plt.subplot(131) Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1], 4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:, 0], Xrr[:, 1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr, (np.random.rand(Xrr.size) - 0.5) * 12.0, c=cValGood, s=40, zorder=1) plt.title('%s: LDA pC %.0f %%' % (titleStr, (ldaScores.mean() * 100.0))) plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) plt.xlabel('DFA 1') plt.ylabel('DFA 2') # Transform the predictions in color codes maxQDA = yPredQDA.max() for ix in range(nxm): cWeight = yPredQDA[ix, :] # Prob for all classes cWinner = ( (cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses XmcQDA[ix, :] = np.dot(cWinner, cClasses) XmcQDA[ix, 3] = cWeight.max() / maxQDA # Plot the surface of probability plt.subplot(132) Zplot = XmcQDA.reshape(np.shape(x1)[0], np.shape(x1)[1], 4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:, 0], Xrr[:, 1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr, (np.random.rand(Xrr.size) - 0.5) * 12.0, c=cValGood, s=40, zorder=1) plt.title('%s: QDA pC %.0f %%' % (titleStr, (qdaScores.mean() * 100.0))) plt.xlabel('DFA 1') plt.ylabel('DFA 2') plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) plt.savefig('%s/%s.eps' % (figdir, titleStr)) # Transform the predictions in color codes maxRF = yPredRF.max() for ix in range(nxm): cWeight = yPredRF[ix, :] # Prob for all classes cWinner = ( (cWeight == cWeight.max()).astype('float')) # Winner takes all # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses # Weighted colors does not work XmcRF[ix, :] = np.dot(cWinner, cClasses) XmcRF[ix, 3] = cWeight.max() / maxRF # Plot the surface of probability plt.subplot(133) Zplot = XmcRF.reshape(np.shape(x1)[0], np.shape(x1)[1], 4) plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto') if nClasses > 2: plt.scatter(Xrr[:, 0], Xrr[:, 1], c=cValGood, s=40, zorder=1) else: plt.scatter(Xrr, (np.random.rand(Xrr.size) - 0.5) * 12.0, c=cValGood, s=40, zorder=1) plt.title('%s: RF pC %.0f %%' % (titleStr, (rfScores.mean() * 100.0))) plt.xlabel('DFA 1') plt.ylabel('DFA 2') plt.axis('square') plt.xlim((-6, 6)) plt.ylim((-6, 6)) plt.show() # Results ldaScore = ldaScores.mean() * 100.0 qdaScore = qdaScores.mean() * 100.0 rfScore = rfScores.mean() * 100.0 ldaScoreSE = ldaScores.std() * 100.0 qdaScoreSE = qdaScores.std() * 100.0 rfScoreSE = rfScores.std() * 100.0 print("Number of classes %d. Chance level %.2f %%" % (nClasses, 100.0 / nClasses)) print("%s LDA: %.2f (+/- %0.2f) %%" % (titleStr, ldaScore, ldaScoreSE)) print("%s QDA: %.2f (+/- %0.2f) %%" % (titleStr, qdaScore, qdaScoreSE)) print("%s RF: %.2f (+/- %0.2f) %%" % (titleStr, rfScore, rfScoreSE)) return ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses
features[:, 13] = labelencoder.fit_transform(features[:, 13]) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() features = scaler.fit_transform(features) from sklearn.model_selection import train_test_split f_train, f_test, t_train, t_test = train_test_split(features, target, test_size=0.15, random_state=0) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis lda = LinearDiscriminantAnalysis(n_components=2) # 6 principal components f_train = lda.fit_transform(f_train) f_test = lda.transform(f_test) from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(n_estimators=40, criterion='entropy', random_state=0) classifier.fit(f_train, t_train) predictions = classifier.predict(f_test) from sklearn.metrics import confusion_matrix, accuracy_score precision = accuracy_score(t_test, predictions) matrix = confusion_matrix(t_test, predictions) # Base Line = Help to evaluate the models # Calculus = most_commom/rest
color='Word')) + geom_line() + labs(title='Word Usage Change over Time in First Presidency and the 12')) youth = (dfwords.groupby(dfwords['Date'].map(lambda x: x.year)).mean()[[ 'young men', 'young women' ]].unstack().reset_index()) youth.columns = ['Word', 'Date', 'Mean TF-IDF Score'] (ggplot(youth, aes(x='Date', y='Mean TF-IDF Score', color='Word')) + geom_line() + labs(title='Word Usage Change over Time in First Presidency and the 12')) pca = PCA(n_components=3) pca_df = pca.fit_transform(tfidf_X_train.todense()) lda = LinearDiscriminantAnalysis(n_components=3) lda_df = lda.fit_transform(tfidf_X_train.todense(), y_train) principalDf = pd.DataFrame(data=pca_df, columns=['pc1', 'pc2', 'pc3']) principalDf['Speaker_num'] = y_train recent_Oaks = list( np.where([ X_train_all.Date[i] > datetime.datetime(2020, 1, 1) and X_train_all.Speaker[i] == 'Dallin H. Oaks' for i in X_train_all.index ])[0]) principalDf['Speaker'] = [to_speaker_dict[y_val] for y_val in y_train] principalDf.loc[recent_Oaks, 'Speaker'] = '2020 Dallin H. Oaks' principalDf.loc[recent_Oaks, 'Speaker_num'] = 15 linearDF = pd.DataFrame(data=lda_df, columns=['lda1', 'lda2', 'lda3']) linearDF['Speaker_num'] = y_train
y = dataset.iloc[:, 13].values xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=0) # Scaling scaler = StandardScaler() xtrain = scaler.fit_transform(xtrain) xtest = scaler.fit_transform(xtest) # Dimensionality Reduction from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda lda = lda(n_components=2) xtrain = lda.fit_transform(xtrain, ytrain) xtest = lda.transform(xtest) from sklearn.naive_bayes import GaussianNB classifier = GaussianNB() classifier.fit(xtrain, ytrain) classifiedvalue = classifier.predict(xtest) from sklearn.metrics import confusion_matrix confusionmatrix = confusion_matrix(ytest, classifiedvalue) from matplotlib.colors import ListedColormap # test Set Visualisation xset, yset = xtest, ytest
y = dat['Class'] # Split off classifications X = dat.ix[:, '0':] # Split off features X_norm = (X - X.min()) / (X.max() - X.min()) # print(cols) # print(X_norm) # print(y) # PCA plotting # plot_method = sklearnPCA(n_components=2) #2-dimensional PCA # LDA plotting plot_method = LDA(n_components=2) #2-dimensional LDA transformed = pd.DataFrame(plot_method.fit_transform(X_norm, y)) plt.scatter(transformed[y == 0][0], transformed[y == 0][1], label='0=neutral', c='black') plt.scatter(transformed[y == 1][0], transformed[y == 1][1], label='1=anger', c='red') plt.scatter(transformed[y == 3][0], transformed[y == 3][1], label='3=disgust', c='orange') plt.scatter(transformed[y == 4][0], transformed[y == 4][1],
##PCA print("Computing PCA projection") t0 = time() X_pca = decomposition.TruncatedSVD(n_components=3).fit_transform(X_test) plot_embedding_2d(X_pca[:, 0:2], y_test, "PCA 2D") plot_embedding_3d(X_pca, y_test, "PCA 3D (time %.2fs)" % (time() - t0)) #%% #LDA print("Computing LDA projection") X2 = X_test.copy() X2.flat[::X_test.shape[1] + 1] += 0.01 # Make X invertible t0 = time() lda = LinearDiscriminantAnalysis(n_components=3) X_lda = lda.fit_transform(X2, y_test) plot_embedding_2d(X_lda[:, 0:2], y_test, "LDA 2D") plot_embedding_3d(X_lda, y_test, "LDA 3D (time %.2fs)" % (time() - t0)) # MDS print("Computing MDS embedding") clf = manifold.MDS(n_components=3, n_init=1, max_iter=100) t0 = time() X_mds = clf.fit_transform(X_test) print("Done. Stress: %f" % clf.stress_) plot_embedding_2d(X_mds, y_test, "MDS (time %.2fs)" % (time() - t0)) plot_embedding_3d(X_mds, y_test, "MDS (time %.2fs)" % (time() - t0)) lable = trainlable c = (lable == 4) c2 = (lable == 9)
print('Between-class scatter matix: %sx%s' %(S_B.shape[0], S_B.shape[1])) eigen_vals, eigen_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B)) eigen_pairs = [ (np.abs(eigen_vals[i], eigetn_vecs[:, i]) for i in range(len(eigen_vals)))] eigen_pairs = sorted(eigen_pairs, key = lambda k: k[0], reverse = True) print('Eigenvalues in decreasing order:\n') for ev in eigen_pairs: print ev[0] ''' # LDA in sklearn lda = LDA(n_components = 2) X_train_lda = lda.fit_transform(X_train_std, y_train) lr = LogisticRegression() lr = lr.fit(X_train_lda, y_train) plot_decision_regions(X_train_lda, y_train, classifier = lr) plt.xlabel('LD 1') plt.ylabel('LD 2') plt.legend(loc = 'lower left') plt.show() # On test set: X_test_lda = lda.transform(X_test_std) plot_decision_regions(X_test_lda, y_test, classifier = lr) plt.xlabel('LD 1')
def lda(self): lda = LDA(n_components=1) self.xlda_train = lda.fit_transform(self.x_train, self.y_train) self.xlda_test = lda.transform(self.x_test) return self.xlda_test, self.xlda_train