def dim_reductor(n, X): reductor=dict(PCA=PCA(n).fit(X).transform(X),\ KPCA=KernelPCA(n,'rbf').fit_transform(X),\ ISOMAP=Isomap(10,n).fit(X).transform(X),\ MDS=MDS(n).fit_transform(X),\ LLE=LLE(X,10,n)[0]) return (reductor)
def lle(space): n_neighbors = int(space['n_neighbors']) method = space['method'] vertices, colors = get_all_vertices_dk_atlas_w_colors() print(space) lle = LLE(n_neighbors=n_neighbors, n_components=2, method=method, neighbors_algorithm='auto') lle_xy = lle.fit_transform(vertices) centers = get_centers_of_rois_xy(lle_xy) avg_distance = avg_distance_between_center_of_masses(centers) model_name = 'lle_{}_{}'.format(method, avg_distance) result = { 'loss': -avg_distance, 'space': space, 'status': STATUS_OK } save_json_result(model_name, result) save_2d_roi_map(lle_xy, colors, centers, model_name) return result
def dim_reduct_plot(file): """ 获取 matteonormb.obj 的所有点,降噪后对其进行绘图 :param file: obj :return: """ mesh = meshio.read(file) points = mesh.points pca_data = prim_com_analy(points, 2) # pca = PCA(n_components=2) # X_pca = pca.fit_transform(points) mds_data = mult_dim_scaling(points, 2) # mds = MDS(n_components=2) # X_mds = mds.fit_transform(points) lle_data = LLE(n_components=2, n_neighbors=8).fit_transform(points) iso_data = ISOMap(n_components=2, n_neighbors=10).fit_transform(points) plt.subplot(221) plt.title('PCA') plt.scatter(pca_data[:, 0], pca_data[:, 1], c='blue', marker='.') plt.subplot(222) plt.title('MDS') plt.scatter(mds_data[:, 0], mds_data[:, 1], c='red', marker='.') plt.subplot(223) plt.title('LLE') plt.scatter(lle_data[:, 0], lle_data[:, 1], c='yellow', marker='.') plt.subplot(224) plt.title('ISOMAP') plt.scatter(iso_data[:, 0], iso_data[:, 1], c='green', marker='.') plt.show()
def nn_check(ppd): for i in range(8, 26): lle = LLE(n_components=3, n_neighbors=i, method='modified', modified_tol=1e-12) XT = lle.fit_transform(ppd) print('running') validity(XT, i) print('done')
def draw(reduction_method): if reduction_method == "PCA": method = PCA(n_components=3) elif reduction_method == "LLE": method = LLE(n_components=3, n_neighbors=5, eigen_solver="auto") elif reduction_method == "Isomap": method = Isomap(n_components=3, n_neighbors=5, eigen_solver="auto") elif reduction_method == "MDS": method = MDS(n_components=3) print() print(reduction_method + ' is being plotted') fitted_method = method.fit_transform(x) data_frame_of_method = pd.DataFrame( data=fitted_method, columns=['component 1', 'component 2', 'component 3']) # print(principalDf.head()) # # print(data_frame[['SKC']].head()) print(int(time.time() - start), 'seconds') finalDf = pd.concat([data_frame_of_method, data_frame[['SKC']]], axis=1) # print('========================') # print(finalDf.head()) # print('========================') fig = plot.figure() ax = fig.add_subplot(111, projection='3d') ax.set_xlabel('X', fontsize=14) ax.set_ylabel('Y', fontsize=14) ax.set_zlabel('Z', fontsize=14) ax.set_title('3 Components ' + reduction_method, fontsize=20) targets = ['BKN', 'SCT', 'CLR', 'OVC'] colors = ['r', 'g', 'b', 'k'] for target, color in zip(targets, colors): indices_to_keep = finalDf['SKC'] == target ax.scatter(finalDf.loc[indices_to_keep, 'component 1'], finalDf.loc[indices_to_keep, 'component 2'], finalDf.loc[indices_to_keep, 'component 3'], c=color, s=1) ax.legend(targets) ax.grid plot.show()
def main(): # ----- settings: dataset = 'MNIST' # --> 'Facial' or 'MNIST' or 'Breast_cancer' embedding_method = 'Isomap' n_components = 5 split_in_cross_validation_again = False load_dataset_again = False subset_of_MNIST = True pick_subset_of_MNIST_again = False MNIST_subset_cardinality_training = 10000 # picking from first samples of 60,000 samples MNIST_subset_cardinality_testing = 5000 # picking from first samples of 10,000 samples # ----- paths: if dataset == 'Facial': path_dataset = './input/att_database/' path_dataset_save = './input/pickle_dataset/Facial/' elif dataset == 'MNIST': path_dataset = './input/mnist/' path_dataset_save = './input/pickle_dataset/MNIST/' elif dataset == 'Breast_cancer': path_dataset = './input/Breast_cancer_dataset/wdbc_data.txt' path_dataset_save = './input/pickle_dataset/MNIST/' # ----- Loading dataset: print('Reading dataset...') if dataset == 'MNIST': if load_dataset_again: training_data = list( read_MNIST_dataset(dataset="training", path=path_dataset)) testing_data = list( read_MNIST_dataset(dataset="testing", path=path_dataset)) number_of_training_samples = len(training_data) dimension_of_data = 28 * 28 X_train = np.empty((0, dimension_of_data)) y_train = np.empty((0, 1)) for sample_index in range(number_of_training_samples): if np.mod(sample_index, 1) == 0: print('sample ' + str(sample_index) + ' from ' + str(number_of_training_samples) + ' samples...') label, pixels = training_data[sample_index] pixels_reshaped = np.reshape(pixels, (1, 28 * 28)) X_train = np.vstack([X_train, pixels_reshaped]) y_train = np.vstack([y_train, label]) y_train = y_train.ravel() number_of_testing_samples = len(testing_data) dimension_of_data = 28 * 28 X_test = np.empty((0, dimension_of_data)) y_test = np.empty((0, 1)) for sample_index in range(number_of_testing_samples): if np.mod(sample_index, 1) == 0: print('sample ' + str(sample_index) + ' from ' + str(number_of_testing_samples) + ' samples...') label, pixels = testing_data[sample_index] pixels_reshaped = np.reshape(pixels, (1, 28 * 28)) X_test = np.vstack([X_test, pixels_reshaped]) y_test = np.vstack([y_test, label]) y_test = y_test.ravel() save_variable(X_train, 'X_train', path_to_save=path_dataset_save) save_variable(y_train, 'y_train', path_to_save=path_dataset_save) save_variable(X_test, 'X_test', path_to_save=path_dataset_save) save_variable(y_test, 'y_test', path_to_save=path_dataset_save) else: file = open(path_dataset_save + 'X_train.pckl', 'rb') X_train = pickle.load(file) file.close() file = open(path_dataset_save + 'y_train.pckl', 'rb') y_train = pickle.load(file) file.close() file = open(path_dataset_save + 'X_test.pckl', 'rb') X_test = pickle.load(file) file.close() file = open(path_dataset_save + 'y_test.pckl', 'rb') y_test = pickle.load(file) file.close() if subset_of_MNIST: if pick_subset_of_MNIST_again: X_train_picked = X_train[ 0:MNIST_subset_cardinality_training, :] X_test_picked = X_test[0:MNIST_subset_cardinality_testing, :] y_train_picked = y_train[0:MNIST_subset_cardinality_training] y_test_picked = y_test[0:MNIST_subset_cardinality_testing] save_variable(X_train_picked, 'X_train_picked', path_to_save=path_dataset_save) save_variable(X_test_picked, 'X_test_picked', path_to_save=path_dataset_save) save_variable(y_train_picked, 'y_train_picked', path_to_save=path_dataset_save) save_variable(y_test_picked, 'y_test_picked', path_to_save=path_dataset_save) else: file = open(path_dataset_save + 'X_train_picked.pckl', 'rb') X_train_picked = pickle.load(file) file.close() file = open(path_dataset_save + 'X_test_picked.pckl', 'rb') X_test_picked = pickle.load(file) file.close() file = open(path_dataset_save + 'y_train_picked.pckl', 'rb') y_train_picked = pickle.load(file) file.close() file = open(path_dataset_save + 'y_test_picked.pckl', 'rb') y_test_picked = pickle.load(file) file.close() X_train = X_train_picked X_test = X_test_picked y_train = y_train_picked y_test = y_test_picked image_shape = (28, 28) elif dataset == 'Facial': if load_dataset_again: X, y, image_shape = read_image_dataset(dataset_path=path_dataset, imagesType='.jpg') save_variable(variable=X, name_of_variable='X', path_to_save=path_dataset_save) save_variable(variable=y, name_of_variable='y', path_to_save=path_dataset_save) save_variable(variable=image_shape, name_of_variable='image_shape', path_to_save=path_dataset_save) else: file = open(path_dataset_save + 'X.pckl', 'rb') X = pickle.load(file) file.close() file = open(path_dataset_save + 'y.pckl', 'rb') y = pickle.load(file) file.close() file = open(path_dataset_save + 'image_shape.pckl', 'rb') image_shape = pickle.load(file) file.close() elif dataset == 'Breast_cancer': data = pd.read_csv( path_dataset, sep=",", header=None ) # read text file using pandas dataFrame: https://stackoverflow.com/questions/21546739/load-data-from-txt-with-pandas labels_of_classes = ['M', 'B'] X, y = read_BreastCancer_dataset(data=data, labels_of_classes=labels_of_classes) X = X.astype( np.float64 ) #---> otherwise MDS has error --> https://stackoverflow.com/questions/16990996/multidimensional-scaling-fitting-in-numpy-pandas-and-sklearn-valueerror # --- cross validation: path_to_save = './input/split_data/' portion_of_test_in_dataset = 0.3 number_of_folds = 10 if split_in_cross_validation_again: train_indices_in_folds, test_indices_in_folds, \ X_train_in_folds, X_test_in_folds, y_train_in_folds, y_test_in_folds = \ cross_validation(X=X, y=y, n_splits=number_of_folds, test_size=portion_of_test_in_dataset) save_variable(train_indices_in_folds, 'train_indices_in_folds', path_to_save=path_to_save) save_variable(test_indices_in_folds, 'test_indices_in_folds', path_to_save=path_to_save) save_variable(X_train_in_folds, 'X_train_in_folds', path_to_save=path_to_save) save_variable(X_test_in_folds, 'X_test_in_folds', path_to_save=path_to_save) save_variable(y_train_in_folds, 'y_train_in_folds', path_to_save=path_to_save) save_variable(y_test_in_folds, 'y_test_in_folds', path_to_save=path_to_save) for fold_index in range(number_of_folds): save_np_array_to_txt(np.asarray( train_indices_in_folds[fold_index]), 'train_indices_in_fold' + str(fold_index), path_to_save=path_to_save) save_np_array_to_txt(np.asarray( test_indices_in_folds[fold_index]), 'test_indices_in_folds' + str(fold_index), path_to_save=path_to_save) else: file = open(path_to_save + 'train_indices_in_folds.pckl', 'rb') train_indices_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'test_indices_in_folds.pckl', 'rb') test_indices_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'X_train_in_folds.pckl', 'rb') X_train_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'X_test_in_folds.pckl', 'rb') X_test_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'y_train_in_folds.pckl', 'rb') y_train_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'y_test_in_folds.pckl', 'rb') y_test_in_folds = pickle.load(file) file.close() print(X_train.shape) print(X_test.shape) # ----- embedding: print('Embedding...') if dataset == 'MNIST': # plot_components(X_projected=X_projected, images=X.reshape((-1, image_shape[0], image_shape[1])), ax=ax, image_scale=0.6, markersize=10, thumb_frac=0.05, cmap='gray_r') # ----- embedding: if embedding_method == 'LLE': clf = LLE(n_neighbors=5, n_components=n_components, method='standard') clf.fit(X=X_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'Isomap': clf = Isomap(n_neighbors=5, n_components=n_components) clf.fit(X=X_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'MDS': clf = MDS(n_components=n_components) X_projected = clf.fit_transform(X=np.vstack([X_train, X_test])) X_train_projected = X_projected[:X_train.shape[0], :] X_test_projected = X_projected[X_train.shape[0]:, :] elif embedding_method == 'PCA': clf = PCA(n_components=n_components) clf.fit(X=X_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'KernelPCA': clf = KernelPCA(n_components=n_components, kernel='rbf') clf.fit(X=X_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'LaplacianEigenmap': clf = LaplacianEigenmap(n_neighbors=5, n_components=n_components) X_projected = clf.fit_transform(X=np.vstack([X_train, X_test])) X_train_projected = X_projected[:X_train.shape[0], :] X_test_projected = X_projected[X_train.shape[0]:, :] elif embedding_method == 'LDA': clf = LDA(n_components=n_components) clf.fit(X=X_train, y=y_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'SPCA': clf = SPCA(n_components=n_components) clf.fit(X=X_train, y=y_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'TSNE': clf = TSNE(n_components=min(3, n_components)) # print(type(list(y_train))) X_projected = clf.fit_transform( X=np.vstack([X_train, X_test]), y=np.asarray(list(y_train) + list(y_test))) X_train_projected = X_projected[:X_train.shape[0], :] X_test_projected = X_projected[X_train.shape[0]:, :] elif embedding_method == 'ML': clf = ML(n_components=n_components) clf.fit(X=X_train, y=y_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'Kernel_FLDA': clf = Kernel_FLDA(n_components=n_components, kernel='linear') clf.fit(X=X_train, y=y_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'No_embedding': X_train_projected = X_train X_test_projected = X_test # --- classification: print('Classification...') # clf = KNN(n_neighbors=1) clf = NB() clf.fit(X=X_train_projected, y=y_train) y_pred = clf.predict(X=X_test_projected) accuracy = accuracy_score(y_true=y_test, y_pred=y_pred) error = 1 - accuracy_score(y_true=y_test, y_pred=y_pred) # --- saving results: save_variable(accuracy, 'accuracy', path_to_save='./output/MNIST/') save_np_array_to_txt(np.asarray(accuracy), 'accuracy', path_to_save='./output/MNIST/') save_variable(error, 'error', path_to_save='./output/MNIST/') save_np_array_to_txt(np.asarray(error), 'error', path_to_save='./output/MNIST/') # --- report results: print(' ') print('Accuracy: ', accuracy * 100) print(' ') print('Error: ', error * 100)
def doLLE(data): lle = LLE(n_components=12) lle_data = lle.fit_transform(data) return lle_data
args = get_args() args.sub_question = [int(i) for i in args.sub_question] X_train, Y_train, X_test, Y_test = get_data() if 1 in args.sub_question: pca = PCA(n_components=2) X_PCA = pca.fit_transform(X_train) show_data(X_PCA, Y_train, 'PCA') isomap = Isomap(n_components=2) X_Isomap = isomap.fit_transform(X_train) show_data(X_Isomap, Y_train, 'Isomap') lle = LLE(n_components=2) X_LLE = lle.fit_transform(X_train) show_data(X_LLE, Y_train, 'LLE') tsne = TSNE(n_components=2) X_TSNE = tsne.fit_transform(X_train) show_data(X_TSNE, Y_train, 'tSNE') if 2 in args.sub_question: f_nums = [1, 10, 20, 50, 100, 300] for num in f_nums: pca = PCA(n_components=num) X_PCA = pca.fit_transform(np.concatenate([X_train, X_test])) X_PCA_train = X_PCA[:X_train.shape[0]] X_PCA_test = X_PCA[X_train.shape[0]:]
# xdata = data3D.transpose()[1] # ydata = data3D.transpose()[2] p3D = plt.axes(projection='3d') xdata, ydata, zdata = tuple(data3D.transpose()) p3D.scatter3D(xdata, ydata, zdata, c=zdata, cmap='Reds') plt.show() pca = PCA(copy=True, n_components=2) data2D_pca = pca.fit_transform(data3D) plt.scatter(*tuple(data2D_pca.transpose()), c="red") plt.title("sklearn PCA") plt.show() data2D_pca_mine = myPCA.fit(data3D, dim_goal=2) plt.scatter(*tuple(data2D_pca_mine.transpose()), c="pink") plt.title("my PCA") plt.show() data3D = _data3D.copy() # print(data3D) mds = MDS(n_components=2) data2D_mds = mds.fit_transform(data3D) # print(data2D_mds) plt.scatter(*tuple(data2D_mds.transpose()), c="blue") plt.title("sklearn MDS") plt.show() data3D = _data3D.copy() lle = LLE(n_neighbors=7, n_components=2) data2D_mds = lle.fit_transform(data3D)
# count +=1 # print(count) # # fig.tight_layout() # ============================================================================= # _____ _ _____ _ _ _ # | __ \(_) | __ \ | | | | (_) # | | | |_ _ __ ___ | |__) |___ __| |_ _ ___| |_ _ ___ _ __ # | | | | | '_ ` _ \ | _ // _ \/ _` | | | |/ __| __| |/ _ \| '_ \ # | |__| | | | | | | | | | \ \ __/ (_| | |_| | (__| |_| | (_) | | | | # |_____/|_|_| |_| |_| |_| \_\___|\__,_|\__,_|\___|\__|_|\___/|_| |_| # ################### Reduce dimensions ######################## off_f_red = LLE(n_neighbors=50, n_components=3).fit_transform(np.transpose(all_off_f)) #off_f_red = LLE(n_neighbors = 50,n_components=3).fit_transform(np.transpose(off_firing[0])) #off_f_red = TSNE(n_components=3).fit_transform(np.transpose(all_off_f)) ## 3D Plot for single trajectory for i in range(4): fig = plt.figure() ax = Axes3D(fig) trial_len = int((tot_time - window_size) / step_size) - 1 ran_inds = np.arange((trial_len * i), (trial_len * (i + 1))) this_cmap = Colormap('hsv') p = ax.scatter(off_f_red[ran_inds, 0], off_f_red[ran_inds, 1], off_f_red[ran_inds, 2], c=np.linspace(1, 255, len(ran_inds)), cmap='hsv')
model = MDS(n_components=2, random_state=2) outS = model.fit_transform(XS) plt.scatter(outS[:, 0], outS[:, 1], **colorize) plt.axis('equal') show() #we lost the y axis instead of unwrapping #Nonlinear Manifolds: locally Linear Embedding #preserve only the distances of nearbny #we can use this (LLE) to unwrap out data from sklearn.manifold import LocallyLinearEmbedding as LLE model = LLE(n_neighbors=100, n_components=2, eigen_solver='dense') out = model.fit_transform(XS) fig, ax = plt.subplots() ax.scatter(out[:, 0], out[:, 1], **colorize) ax.set_ylim(0.15, -0.15) show() #pretty close to the original print("isomaps") #example: isomaps on faces #get the data from sklearn.datasets import fetch_lfw_people faces = fetch_lfw_people(min_faces_per_person=30) #(2370, 2914) shape
def apply_dr(input_file, output_folder, dataset_name="MNIST", dr_name="PCA", perplexity=None, n_neighbors=None, min_dist=None, max_samples=5000, size=None, c=None): fn = "{dataset_name}{size}{c}{dr_name}{perp}{neigh}{mindist}".format( dataset_name=dataset_name, size="_size" + str(size) if size is not None else "", c="_c" + str(c) if c is not None else "", dr_name="_" + dr_name, perp="_p" + str(perplexity) if perplexity is not None else "", neigh="_n" + str(n_neighbors) if n_neighbors is not None else "", mindist="_d" + str(min_dist) if min_dist is not None else "", ) if os.path.exists(output_folder + fn + ".csv"): print("---------Skipping: {}{}-----------".format(input_file, fn)) return try: df = pd.read_csv(input_file) print(("---------Startings: {} - {}-----------".format(input_file, fn))) except: print("{} - does not exist".format(fn)) return y = df["labels"] X = df.iloc[:, :-2] if df.shape[0] > max_samples: X_train, features, y_train, labels = train_test_split( X, y, test_size=max_samples, random_state=42, stratify=y) else: features = X labels = y idx = list(features.index) filename = df.loc[idx, "filename"] ######## ## apply dr if dr_name == "CPCA": dr = CPCA(n_components=2) if dr_name == "PCA": dr = PCA(n_components=2) elif dr_name == "TSNE": dr = TSNE(n_components=2, perplexity=perplexity, verbose=0) elif dr_name == "ISM": dr = Isomap(n_components=2, n_neighbors=n_neighbors) elif dr_name == "LLE": dr = LLE(n_components=2, n_neighbors=n_neighbors) elif dr_name == "SE": dr = SE(n_components=2, n_neighbors=n_neighbors) elif dr_name == "UMAP": dr = umap.UMAP(n_components=2, n_neighbors=n_neighbors, verbose=False, min_dist=min_dist) elif dr_name == "GRP": dr = GRP(n_components=2) elif dr_name == "MDS": dr = MDS(n_components=2) try: dr_data = dr.fit_transform(features) except: return dr_data = pd.DataFrame( dr_data, columns=["{}_1".format(dr_name), "{}_2".format(dr_name)]) dr_data.index = idx ## save stuff if labels is not None: dr_data["labels"] = list(labels) dr_data["filename"] = list(filename) # fig, ax = plt.subplots() # sns.scatterplot(dr_data['{}_1'.format(dr_name)], dr_data['{}_2'.format(dr_name)], hue = dr_data['labels'], ax=ax) # plt.savefig(dataset_name + '/figures/1_' + fn +'.pdf') # plt.close('all') dr_data.to_csv(output_folder + fn + ".csv", index=False) print(("---------Finished: {}{}-----------".format(dataset_name, fn))) return
def SLLE(): le = LLE(n_components=3, n_neighbors=14) slle = le.fit_transform(ppd) km_slle2 = Kmeans_2D(slle, "KM Clustering on 2D Standard LLE.html", 8) km_slle3 = Kmeans_3D(slle, "KM Clustering on 3D Standard LLE.html", 8)
km_lmaps2 = Kmeans_2D(lmaps, "KM Clustering on 2D Laplacian Eigenmaps.html", 8) km_lmaps3 = Kmeans_3D(lmaps, "KM Clustering on 3D Laplacian Eigenmaps", 8) # In[20]: SLLE() Iso_map() Laplacian_eigenmap() # In[21]: #to get the best parameters for dimensionality reduction and clustering nn_check(ppd) # In[22]: #Modified LLE lle = LLE(n_components=5, n_neighbors=8, method='modified', modified_tol=1e-12) middle = lle.fit_transform( ppd) #passing adata.X giving different clustering results # In[23]: lle = LLE(n_components=3, n_neighbors=11, method='modified', modified_tol=1e-12) reduced_lle = lle.fit_transform(ppd) # In[24]: km_mds2 = Kmeans_2D(reduced_lle, "KM Clustering on 2D Modified LLE.html", 7) km_mds3 = Kmeans_3D(reduced_lle, "KM Clustering on 3D Modified LLE.html", 7) # In[25]: #call ICA function
df = df.dropna(axis='columns') X = df.iloc[:, 5:] y = df.iloc[:, 0:5] # Scale from sklearn import preprocessing X = preprocessing.scale(X) # Locally Linear Embedding 2 Components from sklearn.manifold import LocallyLinearEmbedding as LLE n_components = 2 neighbors = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] for neighbor in neighbors: # Prodcues dataset for each neighbor embedding = LLE(n_neighbors=neighbor, n_components=n_components, eigen_solver='dense', reg=0.001) columns = ["LLE_{}".format(j + 1) for j in range(n_components)] X_transformed = pd.DataFrame(embedding.fit_transform(X), columns=columns) pc_df = pd.concat([y, X_transformed], axis=1, sort=False) pc_df.to_csv('./Data/Reduced DataFrames/LLE/LLE-{}N.csv'.format(neighbor), header=True, index=False) print("Round Done: {}".format(neighbor)) # 100 Components from sklearn.manifold import LocallyLinearEmbedding as LLE n_components = 100 neighbors = 13
def main(): # load ORL or load Yale xTrain_, yTrain, xTest_, yTest = loadORLImages(u'./att_faces', 5) # xTrain_, yTrain, xTest_, yTest = loadYaleImages() # WT+PCA+SVM # WT xTrain = np.array(wavelet_transform(xTrain_)) xTest = np.array(wavelet_transform(xTest_)) #Yale dataset wavelet # xTrain = np.array(wavelet_transform(xTrain_,100,100)) # xTest = np.array(wavelet_transform(xTest_,100,100)) # PCA data = np.float32(np.mat(xTrain)) pca = PCA(n_components=50) pca.fit(data) xTrain = pca.transform(data) print('PCA解释率%s' % sum(pca.explained_variance_ratio_)) xTest = pca.transform(np.float32(np.mat(xTest))) # SVM score = SVM_GridSearch(xTrain, yTrain, xTest, yTest) print('WT+PCA+SVM精度为%s' % score) # PCA+SVM # PCA data = np.float32(np.mat(xTrain_)) pca = PCA(n_components=50) pca.fit(data) xTrain = pca.transform(data) print('PCA解释率%s' % sum(pca.explained_variance_ratio_)) xTest = pca.transform(np.float32(np.mat(xTest_))) # SVM score = SVM_GridSearch(xTrain, yTrain, xTest, yTest) print('PCA+SVM精度为%s' % score) # LDA+SVM # #%% LDA directly # clf = LDA() # clf.fit(xTrain_, yTrain) # yPredict = clf.predict(xTest_) # print(np.where(yPredict != np.array(yTest))) # print(u'LDA识别率: %.2f%%' % ((yPredict == np.array(yTest)).mean()*100)) #use for feature extration clf = LDA(n_components=50) clf.fit(xTrain_, yTrain) xTrain = clf.transform(xTrain_) #xTrain为降维后的数据 xTest = clf.transform(xTest_) #print ('LDA的数据中心点:',clf.means_) #中心点 print('LDA做分类时的正确率:', clf.score(xTest_, yTest)) #score是指分类的正确率 # SVM score = SVM_GridSearch(xTrain, yTrain, xTest, yTest) print('LDA+SVM精度为%s' % score) # LLE+SVM from sklearn.manifold import LocallyLinearEmbedding as LLE lle = LLE(n_neighbors=30, n_components=50, method='standard') lle.fit(xTrain_) xTrain = lle.transform(xTrain_) xTest = lle.transform(xTest_) # trans_data,err = lle.fit_transform(xTrain_) # print("LLE Done. Reconstruction error: %g" % err) # SVM score = SVM_GridSearch(xTrain, yTrain, xTest, yTest) print('LLE+SVM精度为%s' % score)
def main(args): outputdir = os.path.dirname(args.vectors) #winidx_path = os.path.join(outputdir, # 'cos-distance_' + os.path.basename(args.weights)) point_path = os.path.splitext(args.vectors)[0] + \ '_{0}_{1}d-points_it{2}_s{3}.txt'.format( args.algorithm, args.components, args.iteration, args.samples) fig_path = os.path.splitext(args.vectors)[0] + \ '_{0}_it{1}_s{2}.eps'.format(args.algorithm, args.iteration, args.samples) print('loading val...') val = utils.io.load_image_list(args.val) categories = utils.io.load_categories(args.categories) v = np.load(args.vectors) N = v.shape[0] d = v.shape[1] C = len(categories) NperC = N // C samples_per_c = args.samples random_order = np.random.permutation(NperC) selected_vectors = [] selected_images = [] Ys = [] for i in range(C): selected_vectors.extend( [v[i * NperC + ii] for ii in random_order[:samples_per_c]]) selected_images.extend( [val[i * NperC + ii][0] for ii in random_order[:samples_per_c]]) Ys.extend( [val[i * NperC + ii][1] for ii in random_order[:samples_per_c]]) #print(selected_vectors) #print(Ys) if args.algorithm == 'tsne': model = utils.TSNE(n_components=args.components, n_iter=args.iteration, n_iter_without_progress=args.preprocessdim, angle=args.angle, metric=args.metric) elif args.algorithm == 'mds': model = MDS(n_components=args.components, n_jobs=-1) elif args.algorithm == 'lle': model = LLE(n_components=args.components, n_neighbors=args.neighbors, n_jobs=-1) elif args.algorithm == 'isomap': model = Isomap(n_components=args.components, n_neighbors=args.neighbors, n_jobs=-1) elif args.algorithm == 'pca': model = PCA(n_components=args.components) #X = model.fit_transform(v[:23*10]) print('fitting...') X = model.fit_transform(np.array(selected_vectors)) Y = np.asarray([x[1] for x in val]) if args.algorithm == 'pca': pca = PCA(n_components=100) pca.fit(np.array(selected_vectors)) E = pca.explained_variance_ratio_ print "explained", E print "cumsum E", np.cumsum(E) print('drawing...') markers = ['o', 'x', 'v', '+'] if args.components == 2: plt.figure(2, figsize=(8, 6)) plt.clf() #plt.scatter(X[:, 0], X[:, 1], c=Y[:23*10], cmap=plt.cm.jet) #plt.scatter(X[:, 0], X[:, 1], c=np.array(Ys), cmap=plt.cm.jet, label=categories) for i in range(C): plt.scatter(X[samples_per_c * i:samples_per_c * (i + 1), 0], X[samples_per_c * i:samples_per_c * (i + 1), 1], marker=markers[i % len(markers)], s=10, color=plt.cm.jet(float(i) / (C - 1)), label=categories[i]) plt.xlabel(args.algorithm + '1') plt.ylabel(args.algorithm + '2') plt.legend(fontsize=10.25, scatterpoints=1, bbox_to_anchor=(1.05, 1.01), loc='upper left') plt.subplots_adjust(right=0.7) #plt.show() plt.savefig(fig_path) elif args.components == 3: from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = Axes3D(fig) ax.set_xlabel("X-axis") ax.set_ylabel("Y-axis") ax.set_zlabel("Z-axis") for i in range(C): ax.scatter(X[samples_per_c * i:samples_per_c * (i + 1), 0], X[samples_per_c * i:samples_per_c * (i + 1), 1], X[samples_per_c * i:samples_per_c * (i + 1), 2], marker=markers[i % len(markers)], s=10, c=plt.cm.jet(float(i) / (C - 1)), label=categories[i]) plt.show() print(model.get_params()) # save points with open(point_path, 'w') as fp: for path, t, p in zip(selected_images, Ys, X): fp.write("{0}\t{1}\t{2}\n".format(path, t, '\t'.join(map(str, p))))