def finalData(algo,lb,dm): Page3_Util.a.maindata.make_XY(lb) pca = LocallyLinearEmbedding(n_components=dm) if algo=='PCA': pca = PCA(n_components=dm) elif algo=='Linear Embading': pca = LocallyLinearEmbedding(n_components=dm) elif algo== 'Isomap': pca = Isomap(n_components=dm) elif algo== 'MDS': pca = MDS(n_components=dm) elif algo== 'SpectralEmbedding': pca = SE(n_components=dm) else: if dm==Page3_Util.a.maindata.N_features:dm=dm-1 pca = TSNE(n_components=dm) principalComponents = pca.fit_transform(Page3_Util.a.maindata.X) principalDf = pd.DataFrame(data=principalComponents , columns=["D{}".format(i) for i in range(dm)]) finalDf = pd.concat([principalDf, Page3_Util.a.maindata.df[[lb]]], axis=1) csv_string = finalDf.to_csv(index=False, encoding='utf-8') csv_string = "data:text/csv;charset=utf-8," + urllib.parse.quote(csv_string) return csv_string
def initial_embed(self, reduce, d): reduce = reduce.lower() assert reduce in ['isomap', 'ltsa', 'mds', 'lle', 'se', 'pca', 'none'] if reduce == 'isomap': from sklearn.manifold import Isomap embed = Isomap(n_components=d) elif reduce == 'ltsa': from sklearn.manifold import LocallyLinearEmbedding embed = LocallyLinearEmbedding(n_components=d, n_neighbors=5, method='ltsa') elif reduce == 'mds': from sklearn.manifold import MDS embed = MDS(n_components=d, metric=False) elif reduce == 'lle': from sklearn.manifold import LocallyLinearEmbedding embed = LocallyLinearEmbedding(n_components=d, n_neighbors=5, eigen_solver='dense') elif reduce == 'se': from sklearn.manifold import SpectralEmbedding embed = SpectralEmbedding(n_components=d) elif reduce == 'pca': from sklearn.decomposition import PCA embed = PCA(n_components=d) if reduce == 'none': self.embed = lambda x: x else: self.embed = lambda x: embed.fit_transform(x)
def get_dim_reds_scikit(pct_features): n_components = max(int(pct_features * num_features), 1) return [ LinearDiscriminantAnalysis(n_components=n_components), TruncatedSVD(n_components=n_components), #SparseCoder(n_components=n_components), DictionaryLearning(n_components=n_components), FactorAnalysis(n_components=n_components), SparsePCA(n_components=n_components), NMF(n_components=n_components), PCA(n_components=n_components), RandomizedPCA(n_components=n_components), KernelPCA(kernel="linear", n_components=n_components), KernelPCA(kernel="poly", n_components=n_components), KernelPCA(kernel="rbf", n_components=n_components), KernelPCA(kernel="sigmoid", n_components=n_components), KernelPCA(kernel="cosine", n_components=n_components), Isomap(n_components=n_components), LocallyLinearEmbedding(n_components=n_components, eigen_solver='auto', method='standard'), LocallyLinearEmbedding(n_neighbors=n_components, n_components=n_components, eigen_solver='auto', method='modified'), LocallyLinearEmbedding(n_neighbors=n_components, n_components=n_components, eigen_solver='auto', method='ltsa'), SpectralEmbedding(n_components=n_components) ]
def lle(numComponents, neighbors=5, hessian=False): # if there's time we can try changing n_neighbors if hessian: return LocallyLinearEmbedding(n_neighbors=neighbors, n_components=numComponents, method='hessian') else: return LocallyLinearEmbedding(n_neighbors=neighbors, n_components=numComponents)
def dr_lle(x_train_org, SCALING, N_COMPONENTS): if SCALING: print("scaling lle...") lle = make_pipeline(StandardScaler(), LocallyLinearEmbedding(n_components=N_COMPONENTS)) else: print("non scaling lle...") lle = LocallyLinearEmbedding(n_components=N_COMPONENTS) lle_result = lle.fit_transform(x_train_org) print("lle_result.shape:", lle_result.shape) return lle_result
def dim_reduction_comparison(dataset, n_comp): from sklearn.decomposition import FactorAnalysis, PCA, KernelPCA from sklearn.manifold import Isomap, LocallyLinearEmbedding from sklearn import preprocessing from tqdm import tqdm N, y_train, T, y_test = import_pickled_data(dataset) name_1 = ["FA", "KPCA", "Isomap", "LLE", "PCA"] dims = \ [ FactorAnalysis(n_components= n_comp, tol=0.01, \ copy=True, max_iter=1000, noise_variance_init=None,\ svd_method='randomized', iterated_power=3, random_state=0), KernelPCA(n_components= n_comp, kernel='linear', gamma=None, degree=3, \ coef0=1, kernel_params=None, alpha=1.0, \ fit_inverse_transform=False, eigen_solver='auto', tol=0, max_iter=None,\ remove_zero_eig=False, random_state=None, copy_X=True, n_jobs=1), Isomap(n_neighbors=5, n_components=n_comp, eigen_solver= 'auto', tol=0, max_iter=None,\ path_method='auto', neighbors_algorithm='auto', n_jobs=1), LocallyLinearEmbedding(n_neighbors=5, n_components=n_comp, reg=0.001, eigen_solver='auto',\ tol=1e-06, max_iter=100, method='standard', hessian_tol=0.0001, modified_tol=1e-12,\ neighbors_algorithm= 'auto', random_state=None, n_jobs=1), PCA(n_components=n_comp, copy=True, whiten=False, svd_solver='auto', \ tol=0.0, iterated_power= 'auto', random_state=None) ] # Transform the train data-set scaler = preprocessing.StandardScaler(with_mean = True,\ with_std = True).fit(N) X_train = scaler.transform(N) X_test = scaler.transform(T) epoch = 1 for n, clf in zip(name_1, dims): scores = np.zeros((epoch,9)) p_value = np.zeros((epoch,9)) print("DR is", n) for i in tqdm(xrange(epoch)): Train = clf.fit_transform(X_train) Test = clf.transform(X_test) names, scores[i,:], p_value[i,:] = comparison(Train, y_train, Test, y_test) print("names", names) mean = np.round(scores.mean(axis = 0),5) std = np.round(scores.std(axis = 0),5) s = ' ' for i, element in enumerate(mean): s = s + ",("+str(element)+','+ str(std[i])+')' print("Accuracy", s) mean = np.round(p_value.mean(axis = 0),5) std = np.round(p_value.std(axis = 0), 5) s = ' ' for i, element in enumerate(mean): s = s + ",("+str(element)+','+ str(std[i])+')' print("p-value", s)
def createLocallyLinearEmbeddingLearning(params): # params['n_neighbors'] = N # params['eigen_solver'} = [‘auto’|’arpack’|’dense’] # params['method'] = (‘standard’, ‘hessian’, ‘modified’ or ‘ltsa’) # params['neighbors_algorithm'] = {‘auto’|’brute’|’kd_tree’|’ball_tree’} mfd = LocallyLinearEmbedding() return mfd
def main(): dataset = datasets.load_digits() X = dataset.data y = dataset.target plt.figure(figsize=(12, 8)) # 主な多様体学習アルゴリズム (と主成分分析) manifolders = { 'PCA': PCA(), 'MDS': MDS(), 'Isomap': Isomap(), 'LLE': LocallyLinearEmbedding(), 'Laplacian Eigenmaps': SpectralEmbedding(), 't-SNE': TSNE(), } for i, (name, manifolder) in enumerate(manifolders.items()): plt.subplot(2, 3, i + 1) # 多様体学習アルゴリズムを使って教師データを 2 次元に縮約する X_transformed = manifolder.fit_transform(X) # 縮約した結果を二次元の散布図にプロットする for label in np.unique(y): plt.title(name) plt.scatter(X_transformed[y == label, 0], X_transformed[y == label, 1]) plt.show()
def LLE_colored(binned_data, trial_range, n_comps, n_neigh, behav_var): # trial_range is an array of two numbers defining the range of concatenated trials # Find first and last bin index for given trial a = list(binned_data['trial_number']) #first = a.index(trial_number) #last = len(a) - 1 - a[::-1].index(trial_number) first = a.index(trial_range[0]) last = len(a) - 1 - a[::-1].index(trial_range[1]) # load neural data and reduce dimensions X = binned_data['summed_spike_amps'][:, first:last].T Y = LocallyLinearEmbedding(n_components=n_comps, n_neighbors=n_neigh).fit_transform(X) # color it with some other experimental parameter x, y, z = np.split(Y, 3, axis=1) fig = plt.figure() ax = Axes3D(fig) p = ax.scatter(x, y, z, s=40, alpha=0.25, c=abs(binned_data[behav_var][first:last]), cmap='jet') fig.colorbar(p) #plt.title("LLE Alex's visual cortex --> hippocampus recording vs wheel speed") #plt.title("LLE Alex's motor cortex --> thalamus recording vs %s" %behav_var) plt.title("LLE Guido's RSC --> CA1 --> midbrain recording vs %s" % behav_var, fontsize=40) # plt.scatter(Y.T[0,:],Y.T[1,:],s=1,alpha=0.9,c=D_trial['wheel_velocity'][trial]) plt.show()
def plotLocallyLinearEmbedding(X, y, filenames=None, dim=3, num=-1, savefile='se.npy', logdir='plots'): """ 绘制embedding, tensorboard Params: X: {ndarray(n_samples, n_features)} y: {ndarray(n_samples)} logdir: {str} """ print("dir: ", logdir) if num != -1: print("Randomly choose...") index = np.random.choice(list(range(X.shape[0])), num, replace=False) filenames = np.array(filenames)[index] if filenames is not None else None X = X[index]; y = y[index] print("LLE...") X = LocallyLinearEmbedding(n_neighbors=50, n_components=dim).fit_transform(X) if filenames is None: images = None else: filenames = list(map(lambda x: x if os.path.isfile(x) else '{}/{}'.format(x, '1.jpg'), filenames)) images = list(map(lambda x: cv2.imread(x, cv2.IMREAD_COLOR), filenames)) images = torch.ByteTensor(np.array(list(map(lambda x: np.transpose(x, axes=[2, 0, 1]), images)))) print("Ploting...") with SummaryWriter(logdir) as writer: writer.add_embedding(mat=X, metadata=y, label_img=images) print("------ Done ------") return X, y
def applyLlleWithStandardisation(data, n_components=None): X = preprocessing.scale(data) lle = LocallyLinearEmbedding(n_components=n_components, eigen_solver="auto") return lle.fit_transform(X)
def fit(self, X, y): #creating a manifold on training data self.model = LocallyLinearEmbedding( method=self.method, n_neighbors=self.n_neighbors, n_components=self.n_components, reg=self.reg, eigen_solver=self.eigen_solver, random_state=self.random_state).fit(X, y) #determining centroids for given points self.centroids = KMeans(n_clusters=self.n_clusters, random_state=self.random_state).fit( self.model.transform(X)) labels = self.centroids.predict(self.model.transform( X)) # Every point is assigned to a certain cluster. #assigning each centroid to the correct cluster confusion_m = confusion_matrix(y, labels) m = Munkres() cost_m = make_cost_matrix(confusion_m) target_cluster = m.compute( cost_m) # (target, cluster) assignment pairs. #saving mapping for predictions self.mapping = { cluster: target for target, cluster in dict(target_cluster).items() }
def LLE_plot(data): """ This function print and plots the result of LLE(Local Linear Embedding) algorithm. """ print("Computing LLE embedding") t1 = time() for n in range(1, 50): plt.figure(figsize=(16,9)) n_neighbors = n print("n_neighbors = %d"%n_neighbors) for i in range(10): condition = data['label'] == i subset_data = data[condition] clf = LocallyLinearEmbedding(n_neighbors, n_components=2, method='standard', eigen_solver='dense') t0 = time() X_lle = clf.fit_transform(subset_data) print("Done. Reconstruction error: %g" % clf.reconstruction_error_) print("Locally Linear Embedding of the digits (time %.2fs)" %(time() - t0)) plt.scatter(X_lle[:, 0], X_lle[:, 1], cmap=plt.cm.hot, s=2, label='digit %d'%i) plt.ylim([-0.1, 0.1]) plt.xlim([-0.2, 0.2]) plt.legend() plt.grid() plt.savefig("./img/n-neighbor=%d.png"%n_neighbors, dpi=300) print("totally consumed time : (%.2fs)" %(time() - t1))
def get_embedding(X, anc, Xtest, d, method='SVD'): if method == 'PCA': F = PCA(d, svd_solver='full') elif method == 'ICA': F = FastICA(d) # elif method == 'SRP': # F = SparseRandomProjection(d) elif method == 'LLE': F = LocallyLinearEmbedding(n_neighbors=15, n_components=d) else: U1, S1, V1 = sc.linalg.svd(X, lapack_driver='gesvd') F = V1[:d, :] Y = np.dot(X, F.T) Yanc = np.dot(anc, F.T) Ytest = np.dot(Xtest, F.T) return Y, Yanc, Ytest Y = F.fit_transform(X) Yanc = F.transform(anc) Ytest = F.transform(Xtest) return Y, Yanc, Ytest
def get_lower_dimensional_projection(cluster_data, algorithm='tsne', projection_dim=2): if algorithm.lower() == 'tsne': tsne_object = TSNE(n_components=projection_dim, random_state=42) lower_dimensional_projected_data = tsne_object.fit_transform( cluster_data) return lower_dimensional_projected_data elif algorithm.lower() == 'pca': pca_object = PCA(n_components=projection_dim, random_state=42, copy=False) lower_dimensional_projected_data = pca_object.fit_transform( cluster_data) return lower_dimensional_projected_data elif algorithm.lower() == "mds": mds_object = MDS(n_components=projection_dim, random_state=42) lower_dimensional_projected_data = mds_object.fit_transform( cluster_data) return lower_dimensional_projected_data else: lle_object = LocallyLinearEmbedding(n_components=projection_dim, random_state=42) lower_dimensional_projected_data = lle_object.fit_transform( cluster_data) return lower_dimensional_projected_data
def plot_LLE(self,df, idx_target,palette='tab10',filename_out=None): X, Y = tools_DF.df_to_XY(df, idx_target) X_LLE = LocallyLinearEmbedding(n_components=2).fit_transform(X) df = pd.DataFrame(numpy.concatenate((Y.reshape(-1, 1), X_LLE),axis=1),columns=['LLE','x0','x1']) df = df.astype({'LLE': 'int32'}) self.plot_2D_features_v3(df, remove_legend=True,palette=palette, filename_out=filename_out) return
def lle(n,nay,x): print('Running LLE') t0 = time() x_lle = LocallyLinearEmbedding(n_components=n, n_neighbors=nay).fit_transform(x) print("done in %0.3fs" % (time() - t0)) print("(see results below)") return x_lle
def decompose(dimred, dim, nneigh): if dimred == 'MDS': # slowest! embedding = MDS(n_components=dim, n_init=__inits, max_iter=__iters, n_jobs=-1, dissimilarity=__dis) elif dimred == 'ISOMAP': # slow embedding = Isomap(n_neighbors=nneigh, n_components=dim, n_jobs=-1) elif dimred == 'LLE': # slow-acceptable embedding = LocallyLinearEmbedding(n_neighbors=nneigh, n_components=dim, n_jobs=-1) elif dimred == 'TSNE': # acceptable embedding = TSNE(n_components=dim, n_iter=__iters, metric='precomputed', learning_rate=__lrate, perplexity=__perplexity) elif dimred == 'UMAP': # fast # embedding = umap.UMAP(n_neighbors=nneigh, n_components=dim, metric=__dis, min_dist=0.1) embedding = umap.UMAP(n_neighbors=nneigh, n_components=dim, min_dist=0.1) elif dimred == 'PCA': # fastest! embedding = PCA(n_components=dim) else: raise ValueError('dimension reduction method not recognized') positions = embedding.fit_transform(An) return positions
def IrisMatchingBootstrap(train_features, train_classes, test_features, test_classes, times, thresholds): total_fmrs = [] total_fnmrs = [] total_crr = np.zeros(times) lle = LocallyLinearEmbedding(n_neighbors=201, n_components=200) lle.fit(train_features) train_redfeatures = lle.transform(train_features) test_redfeatures = lle.transform(test_features) for t in range(times): tests_features, tests_classes = selectTestSample( test_redfeatures, test_classes) crr, distm, distn = IrisMatching(train_redfeatures, train_classes, tests_features, tests_classes, 3) fmrs, fnmrs = calcROC(distm, distn, thresholds) total_fmrs.append(fmrs) total_fnmrs.append(fnmrs) total_crr[t] = crr total_fmrs = np.array(total_fmrs) total_fnmrs = np.array(total_fnmrs) crr_mean = np.mean(total_crr) crr_std = np.std(total_crr) crr_u = min(crr_mean + crr_std * 1.96, 1) crr_l = crr_mean - crr_std * 1.96 return total_fmrs, total_fnmrs, crr_mean, crr_u, crr_l
def function(self, data): # pylint: disable=not-a-mapping lle = LocallyLinearEmbedding(n_neighbors=self.n_neighbors, n_components=self.n_components, **self.kwargs) emb = lle.fit_transform(data) return emb
def evaluate_embeddings(D, labels): estimators = [ KMeans(init='k-means++', n_clusters=5, n_init=10) ] #,AgglomerativeClustering(n_clusters=5),AgglomerativeClustering(n_clusters=5,linkage='average')] est_names = [ 'KMeans' ] #,'wardAgglomerativeClustering','avgAgglomerativeClustering'] for e in range(len(estimators)): print '!!----------------------------------!!' print est_names[e] estim = estimators[e] for i in range(2, 6 + 1): print '--------------------------------------' print '#dim = ' + str(i) model_t = TSNE(n_components=i, learning_rate=100, perplexity=10, method='exact') x = model_t.fit_transform(D) bench_k_means(estim, name="tsne", data=x, labels=labels) model_i = Isomap(n_components=i) x = model_i.fit_transform(D) bench_k_means(estim, name="isomap", data=x, labels=labels) model_l = LocallyLinearEmbedding(n_components=i) x = model_l.fit_transform(D) bench_k_means(estim, name="lle", data=x, labels=labels)
def IrisMatchingRed(train_features, train_classes, test_features, test_classes, n): train_redfeatures = train_features.copy() test_redfeatures = test_features.copy() total = float(len(test_classes)) if n < 108: lda = LinearDiscriminantAnalysis(n_components=n) lda.fit(train_features, train_classes) train_redfeatures = lda.transform(train_features) test_redfeatures = lda.transform(test_features) if n >= 108 and n < 323: lle = LocallyLinearEmbedding(n_neighbors=n + 1, n_components=n) lle.fit(train_features) train_redfeatures = lle.transform(train_features) test_redfeatures = lle.transform(test_features) l1knn = KNeighborsClassifier(n_neighbors=1, metric='l1') l1knn.fit(train_redfeatures, train_classes) l1classes = l1knn.predict(test_redfeatures) l1crr = float(np.sum(l1classes == test_classes)) / total l2knn = KNeighborsClassifier(n_neighbors=1, metric='l2') l2knn.fit(train_redfeatures, train_classes) l2classes = l2knn.predict(test_redfeatures) l2crr = float(np.sum(l2classes == test_classes)) / total cosknn = KNeighborsClassifier(n_neighbors=1, metric='cosine') cosknn.fit(train_redfeatures, train_classes) cosclasses = cosknn.predict(test_redfeatures) coscrr = float(np.sum(cosclasses == test_classes)) / total # table_CRR() return l1crr, l2crr, coscrr
def data_lle_preprocessing(data, feature_columns): data = data.dropna() sc = preprocessing.StandardScaler() data[feature_columns] = sc.fit_transform(data[feature_columns]) lle = LocallyLinearEmbedding(n_components=4) data[feature_columns[:-1]] = lle.fit_transform(data[feature_columns]) return data, feature_columns[:-1]
class DimRedConfig: ''' Contain configs that do not change in DimRedTool for easy control and change ''' # all dimension reduction methods that can be used in dependence of params dict_methods = OrderedDict({ 'Random Projection': SparseRandomProjection(), 'PCA': PCA(), 'Isomap': Isomap(), 'MDS': MDS(n_init=1, max_iter=100), 'LLE': LocallyLinearEmbedding(method='standard'), 'MLLE': LocallyLinearEmbedding(method='modified'), 'HLLE': LocallyLinearEmbedding(method='hessian'), 'LTSA': LocallyLinearEmbedding(method='ltsa'), 'Random Trees': (RandomTreesEmbedding(n_estimators=200, max_depth=5), TruncatedSVD()), 'Spectral': SpectralEmbedding(eigen_solver='arpack'), 'TSNE': TSNE(init='pca'), 'NCA': NeighborhoodComponentsAnalysis(init='random'), }) all_methods = dict_methods.keys()
def get_metastable_connections_from_gmm( data, gmm, connection_estimation_method='max_path_distance_diff', min_paths=3, distance='euclidean', low_dimension_distances=True, as_graph=False): means = gmm.means_ memberships = gmm.predict(data) if connection_estimation_method in [ 'max_path_distance_diff', 'connecting_paths', 'mst' ]: if low_dimension_distances: pca = PCA(n_components=2) lle = LocallyLinearEmbedding(n_components=2, n_neighbors=int(0.8 * data.shape[0])) distance_matrix = squareform( pdist(lle.fit_transform(data), distance)) else: distance_matrix = squareform(pdist(data, distance)) weighted_graph = nx.Graph(distance_matrix) else: weighted_graph = None return get_metastable_connections(data, means, memberships, method=connection_estimation_method, weighted_graph=weighted_graph, min_paths=3, as_graph=as_graph)
def test_n_componets_from_reducer(): from pymks import MKSHomogenizationModel, DiscreteIndicatorBasis from sklearn.manifold import LocallyLinearEmbedding reducer = LocallyLinearEmbedding(n_components=7) dbasis = DiscreteIndicatorBasis(n_states=3, domain=[0, 2]) model = MKSHomogenizationModel(dimension_reducer=reducer, basis=dbasis) assert model.n_components == 7
def Train_model(X_data, params, model): """ Käytetään valittua dimensionaalisuuden vähentämismenetelmää korkeadimensioiseen dataan ja palautetaan normalisoidut kaksiulotteiset arvot sekä laskenta-aika """ t0 = time() if model == 'PCA': X_reduced_data = PCA(n_components=2).fit_transform(X_data) elif model == 'MDS': X_reduced_data = MDS(n_jobs=-1).fit_transform(X_data) elif model == 'LLE': X_reduced_data = LocallyLinearEmbedding( method='modified', n_neighbors=params[0], n_jobs=-1).fit_transform(X_data) elif model == 'ISOMAP': X_reduced_data = Isomap().fit_transform(X_data) elif model == 'TSNE': X_reduced_data = TSNE(n_components=2, metric='sqeuclidean').fit_transform(X_data) elif model == 'UMAP': X_reduced_data = umap.UMAP(n_neighbors=params[1], min_dist=params[2], metric='correlation').fit_transform(X_data) #-------TÄHÄN SINUN KOODI-------- t1 = time() deltatime = t1 - t0 X_scaled = MinMax_normalization(X_reduced_data) return X_scaled, deltatime
def spectral_embedding(train, val, method, plot_folder, neighbors, classes, dimensions=2): projected_train = train.copy() projected_val = val.copy() if method == "Isomap": embedding = Isomap(n_neighbors=neighbors, n_components=dimensions).fit(train['data']) projected_train['data'] = embedding.transform(train['data']) projected_val['data'] = embedding.transform(val['data']) elif method == "TSNE": embedding = TSNE(n_components=dimensions) projected_train['data'] = embedding.fit_transform(train['data']) projected_val['data'] = embedding.fit_transform(val['data']) elif method == "LLE": embedding = LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions).fit( train['data']) projected_train['data'] = embedding.transform(train['data']) projected_val['data'] = embedding.transform(val['data']) elif method == "modified_LLE": embedding = LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions, method="modified").fit( train['data']) projected_train['data'] = embedding.transform(train['data']) projected_val['data'] = embedding.transform(val['data']) elif method == "hessian_LLE": embedding = LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions, method="hessian").fit(train['data']) projected_train['data'] = embedding.transform(train['data']) projected_val['data'] = embedding.transform(val['data']) elif method == "laplacian_eigenmaps": embedding = SpectralEmbedding(n_components=dimensions) projected_train['data'] = embedding.fit_transform(train['data']) projected_val['data'] = embedding.fit_transform(val['data']) visualize_groundTruth(projected_train, method + "_training", plot_folder, classes) visualize_groundTruth(projected_val, method + "_validation", plot_folder, classes) return projected_train, projected_val
def wrap_lle(x, required_d, neighbors): # 对输入x,用LLE方法降维到required_d维,并将降维后的数据保存为np文件,方便下次调用 lle = LocallyLinearEmbedding(n_components=required_d, n_neighbors=neighbors) lle.fit(x) x_lle = lle.embedding_ np.save('LLE/np_x_LLE_' + str(required_d) + str(neighbors), x_lle) return x_lle
def score_lle(x_train, y_train, x_test, y_test): lle = LocallyLinearEmbedding(n_neighbors=5, n_components=4) x_train = lle.fit_transform(x_train) x_test = lle.fit_transform(x_test) nb = GaussianNB() nb.fit(x_train, y_train) y_pred = nb.predict(x_test) return accuracy_score(y_pred, y_test)