class SemiSupervisedGradientBoosting : def __init__(self, max_depth=3, n_estimators=10, learning_rate=0.1, min_samples_leaf=4, n_neighbors=5, n_components=2) : self.GB = GradientBoosting.GradientBoosting(max_depth, n_estimators, learning_rate, min_samples_leaf) self.Transformator = LocallyLinearEmbedding(n_neighbors, n_components) def fit_predict(self,Xl, y, Xu) : print 'start collapse space' delimeter = Xl.shape[0] X_all = np.vstack((Xl, Xu)) X_all = self.Transformator.fit_transform(X_all) X_l_t = X_all[:delimeter] X_u_t = X_all[delimeter:] del X_all print 'start compute simalirity' Sim = GradientBoosting.Simalirity(X_l_t, X_u_t) print 'end compute simalirity' del X_l_t, X_u_t #Xl = X_all[:delimeter] #Xu = X_all[delimeter:] print 'end collapse space succesfully' return self.GB.fit_predict(Xl, y, Xu, Sim) def predict(self,X) : return self.GB.predict(X) def score (self, X, y) : return self.GB.score(X, y)
def pseudotimes_from_embedding(data_array, n_neighbors=None): if n_neighbors is None: n_neighbors = int(data_array.shape[0] * 0.5) embedding = LocallyLinearEmbedding(n_components=1, n_neighbors=n_neighbors) u, s, v = np.linalg.svd(data_array, full_matrices=1) l = 2 denoised_data_array = np.dot(u[:, :l], np.dot(np.diag(s[:l]), v[:l, :])) pseudotimes = embedding.fit_transform(denoised_data_array) pseudotimes -= pseudotimes.min() pseudotimes /= pseudotimes.max() return pseudotimes
def get_metastable_connections_from_gmm(data, gmm, connection_estimation_method='max_path_distance_diff', min_paths=3, distance='euclidean', low_dimension_distances=True, as_graph=False): means = gmm.means_ memberships = gmm.predict(data) if connection_estimation_method in ['max_path_distance_diff', 'connecting_paths', 'mst']: if low_dimension_distances: pca = PCA(n_components=2) lle = LocallyLinearEmbedding(n_components=2, n_neighbors=int(0.8*data.shape[0])) distance_matrix = squareform(pdist(lle.fit_transform(data), distance)) else: distance_matrix = squareform(pdist(data, distance)) weighted_graph = nx.Graph(distance_matrix) else: weighted_graph = None return get_metastable_connections(data, means, memberships, method=connection_estimation_method, weighted_graph=weighted_graph, min_paths=3, as_graph=as_graph)
def preprocess(x_train: np.ndarray, y_train: np.ndarray, x_test: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Prepocesses data. :param x_train: the training data. :param y_train: the training labels. :param x_test: the test data. :return: Preprocessed x_train and x_test. """ logger.log('Prepocessing...') # Scale data. logger.log('\tScaling data with params:') scaler = MinMaxScaler() logger.log('\t{}'.format(scaler.get_params())) x_train = scaler.fit_transform(x_train.astype(float)) x_test = scaler.transform(x_test.astype(float)) # Apply LLE. logger.log('\tApplying LLE with params:') embedding = LocallyLinearEmbedding(n_neighbors=100, n_jobs=-1, random_state=0) embedding_params = embedding.get_params() logger.log('\t' + str(embedding_params)) x_train = embedding.fit_transform(x_train) x_test = embedding.transform(x_test) # Plot the graph embedding result. if PLOTTING_MODE != 'none': plotter.subfolder = 'graphs/LLE' plotter.filename = 'embedding' plotter.xlabel = 'first feature' plotter.ylabel = 'second feature' plotter.title = 'LLE' plotter.scatter(x_train, y_train, class_labels=helpers.datasets.get_voice_name) return x_train, x_test
def run_LLE(self, n_neighbors, low_dim_size): """ Run LLE algorithm Parameters ---------- self : object EC_SCOP_Evaluate object setup for this analysis n_neighbors : int number of neighbors using for the isomap run low_dim_size : int resulted number of dimensions after isomap Returns ------- None """ print("Run Locally Linear Embeddings") lle = LocallyLinearEmbedding(n_neighbors=n_neighbors, n_components=low_dim_size, method='modified') self.X_low = lle.fit_transform(self.get_x().values) print("Done. Reconstruction error: {:.3f}".format( lle.reconstruction_error_))
from mpl_toolkits.mplot3d import Axes3D from sklearn.datasets import make_s_curve from sklearn.manifold import LocallyLinearEmbedding, TSNE n_points = 1000 X, color = make_s_curve(n_points, random_state=0) n_neighbors = 10 n_components = 2 fig = plt.figure(figsize=(12, 4)) ax = fig.add_subplot(131, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.viridis) ax.view_init(4, -72) ax.set_title('Original Data') lle = LocallyLinearEmbedding(n_neighbors=n_neighbors, n_components=n_components, method='standard') Y1 = lle.fit_transform(X) ax = fig.add_subplot(132) ax.scatter(Y1[:, 0], Y1[:, 1], c=color, cmap=plt.cm.viridis, alpha=0.8) ax.set_title('LLE') tsne = TSNE(n_components=n_components, init='pca') Y2 = tsne.fit_transform(X) ax = fig.add_subplot(133) ax.scatter(Y2[:, 0], Y2[:, 1], c=color, cmap=plt.cm.viridis, alpha=0.8) ax.set_title('t-SNE') plt.show()
parser.add_argument("--n-components", type=int, default=2) parser.add_argument("--n-neighbors", type=int, default=4) args = parser.parse_args() X, y = load_raw() name = f"components_{args.n_components}_neighbors_{args.n_neighbors}" data_save_folder = f"./data/LLE/{name}" fig_save_folder = f"./fig/LLE" makedirs(data_save_folder) makedirs(fig_save_folder) lle = LocallyLinearEmbedding(n_neighbors=args.n_neighbors, n_components=args.n_components) X_decomposed = lle.fit_transform(X) np.save(osp.join(data_save_folder, "feature.npy"), X_decomposed) np.save(osp.join(data_save_folder, "label.npy"), y) if args.n_components == 2: x_min, x_max = X_decomposed.min(0), X_decomposed.max(0) X_normalized = (X_decomposed - x_min) / (x_max - x_min) plt.figure(figsize=(8, 8)) for i in range(X_normalized.shape[0]): plt.text(X_normalized[i, 0], X_normalized[i, 1], str(y[i]), color=plt.cm.Set3(y[i] % 12), fontdict={ 'weight': 'bold', 'size': 9
df = pd.read_csv('../../Documents/ece657a/data/DataB.csv') df = df.astype(float) target = df['gnd'] data = df.values[:, 1: len(df.columns) - 1] threes_df = df.loc[df['gnd'] == 3] threes_data = threes_df.values[:, 1: len(df.columns) - 1] threes_data = (threes_data - threes_data.min()) / \ (threes_data.max() - threes_data.min()) n_neighbors = 5 n_components = 4 # 1. Apply LLE lle = LocallyLinearEmbedding(n_neighbors=n_neighbors, n_components=n_components) lle_data = lle.fit_transform(threes_data) lle_df = pd.DataFrame(lle_data) plot_three("LLE", lle_df, 0, 1, threes_df, 0.45) # 2. Apply ISOMAP iso = Isomap(n_neighbors=n_neighbors, n_components=n_components) iso_data = iso.fit_transform(threes_data) iso_df = pd.DataFrame(iso_data) plot_three("Isomap", iso_df, 0, 1, threes_df, 0.45) # 3. Use the Naive Bayes classier to classify the dataset based on the projected 4-dimension representations of the LLE and ISOMAP. df_data = df.values[:, 1: len(df.columns) - 1] test_size = 0.3
X_train, _, _ = one_hot_dataframe(X_raw, [ 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome' ], replace=True) y_train = [1 if i == 'yes' else 0 for i in df.y] reductions = [] pca = PCA(n_components=2) reductions.append(pca.fit_transform(X_train, y_train)) lda = LDA(n_components=2) reductions.append(lda.fit_transform(X_train, y_train)) isomap = Isomap(n_components=2) reductions.append(isomap.fit_transform(X_train, y_train)) lle = LocallyLinearEmbedding(n_components=2, method='standard') reductions.append(lle.fit_transform(X_train, y_train)) for reduced_X in reductions: plt.figure() red_x = [] red_y = [] blue_x = [] blue_y = [] green_x = [] green_y = [] for i in range(len(reduced_X)): if y_train[i] == 0: red_x.append(reduced_X[i][0]) red_y.append(reduced_X[i][1]) elif y_train[i] == 1:
elif sys.argv[1] == '-isomap': trimmedImages = [] for i in range(len(images)): images[i] = np.reshape(images[i], (-1)) images[i] = images[i][:minSize] trimmedImages.append(images[i]) isomap = Isomap(n_components=136) reducedImages = isomap.fit_transform(trimmedImages) elif sys.argv[1] == '-lle': trimmedImages = [] for i in range(len(images)): images[i] = np.reshape(images[i], (-1)) images[i] = images[i][:minSize] trimmedImages.append(images[i]) lle = LocallyLinearEmbedding(n_components=136) reducedImages = lle.fit_transform(trimmedImages) # Do cross-fold validation kf = KFold(len(images), n_folds=2) minAreas = {} maxAreas = {} avgAreas = {} totals = {} for train_index, test_index in kf: xTrain = reducedImages[train_index] yTrain = labels[train_index] clf = OneVsRestClassifier(LinearSVC(), 4) clf.fit(xTrain, yTrain) xTest = reducedImages[test_index] yTest = labels[test_index] areas = eval(classes, clf, xTest, yTest)
def get_LLE_image(data): LLE = LocallyLinearEmbedding(n_components=2, n_neighbors=10) X_LLE = LLE.fit_transform(data) return X_LLE
def lle(X=None, W=None, num_vecs=None, k=None): embedder = LocallyLinearEmbedding(n_neighbors=k, n_components=num_vecs) return embedder.fit_transform(X)
plt.show() #Use iso_model.transform(x_test) to fit the isomap from the training set onto the test set ''' ------------------------------------------------------------------------------- -------------------------------Modified LLE------------------------------------ ------------------------------------------------------------------------------- ''' #Apply modified LLE, keeping n components < the number of original features #method = 'standard' for LLE, 'hessian' for HELLE, or 'modified' for modified LLE mlle_model = LocallyLinearEmbedding(n_neighbors=5, n_components=2, method='modified', random_state=seed) mlle_model.fit_transform(x_std) print(mlle_model.get_params()) mlle_dim = mlle_model.embedding_ print(mlle_dim.shape) #There should be 2 latent variables represented #Plot first 2 extracted features and the observation class plt.figure(figsize=(10, 5)) plt.xlabel('Latent Variable 1 (explains most variance)') plt.ylabel('Latent Variable 2 (explains second most variance)') plt.title('Modified LLE 2-Dimension Plot with Observation Class, 5 neighbors') plt.scatter(mlle_dim[:, 0], mlle_dim[:, 1], c=y) plt.colorbar() plt.show() #Try a different number of neighbors mlle_model = LocallyLinearEmbedding(n_neighbors=15,
from sklearn.utils import shuffle from sklearn.naive_bayes import GaussianNB from sklearn.manifold import LocallyLinearEmbedding from data.preprocess import features_preprocess, features_test_preprocess, labels_preprocess, labels_preprocess_num from data.preprocess_2nd import preprocess_ft_lbls_num from data.preprocess import ft_lbls_num scores = [] embedding = LocallyLinearEmbedding(n_components=10) (features1, labels1) = ft_lbls_num() (features2, labels2) = preprocess_ft_lbls_num() features1 = embedding.fit_transform(features1, labels1) features2 = embedding.fit_transform(features2, labels2) K = 5 cv = KFold(n_splits=K, shuffle=True) features = numpy.concatenate((features1, features2)) labels = numpy.concatenate((labels1, labels2)) clf = svm.SVC(kernel='rbf') for i in range(100): features1, labels1 = shuffle(features1, labels1) for train, test in cv.split(features1):
def main(): parser = argparse.ArgumentParser(description= 'Perform Dimensionality Reduction') parser.add_argument('--alg', type=str, default='MLLE', help='Algorithm to reduce dimensionality.') parser.add_argument('catalog', type=str, help='Specify the catalog on which to perform DimReduce.') args = parser.parse_args() #dat = Table.read('catalogs/ZEST_catalog_colors.fits') #training_sample = dat[0:10000] #testing_sample = dat[10001:20000] #zkeys = ['cc', 'aa', 'm20', 'gg'] base = os.path.basename(args.catalog) filename = os.path.splitext(base)[0] dat = Table.read(args.catalog) mkeys = ['elipt', 'C', 'A_1a', 'G', 'M20']# #dat.remove_column('color') if 'color' not in dat.colnames: if 'kaggle' in sample: dat = prep_catalog.color_data2(dat, 'gz2class') if 'direct' in sample: dat = prep_catalog.color_data(dat, 'zclass') dat.write(args.catalog, overwrite=True) #dat = prep_catalog.adjust_asym(dat, mkeys[2]) #train, traincols, targets = prep_catalog.whiten_data(dat, mkeys) n_neighbors = [10,12,15,20] #n_neighbors = [7] n_components = 3 for i, n_neigh in enumerate(n_neighbors): if args.alg in ['MLLE', 'LLE', 'LTSA', 'HLLE']: if args.alg == 'MLLE': method = 'modified' elif args.alg == 'LLE': method = 'standard' elif args.alg == 'LTSA': method = 'ltsa' elif args.alg == 'HLLE': method = 'hessian' #replace_panoptes(dat) #pdb.set_trace() #sample = 'directbig_panoptes' X, y = prep_catalog.whiten_data(dat, mkeys) (dat1, dat2),(thing1,thing2) = split_samples(dat, dat,[0.75, 0.35], random_state=0) (X_train, X_test), (y_train, y_test) = split_samples(X, y, [0.75, 0.35], random_state=0) y_train = simplify_classlabels(y_train) y_test = simplify_classlabels(y_test) #filename = 'modified_7_directbig_new' X_train = X y_train = simplify_classlabels(y) #''' #sample ='direct_zcut' #Y_train, Y_test = open_previous_LLE(filename) #cut = np.where(X1['REDSHIFT'] <= 0.05) #X1_cut = X1[cut] #QC_plots(X1_cut) #Y_train = np.array(Y_train)[cut] #col_train = np.array(col_train)[cut] #X = Table(X) #cut_out_mixedup_region(X, np.array(Y_train)) #''' print "performing "+method+" LLE with",n_neigh,\ "nearest neighbors" print "on training sample of",len(X_train),"objects" t0 = time() A = LLE(n_neigh, n_components, eigen_solver='auto', method=method) error = A.fit(X_train).reconstruction_error_ Y_train = A.fit_transform(X_train) Y_test = A.transform(X_train) t1 = time() #''' metadata = {'method':method, 'N':n_neigh, 'd':n_components, 'error':error, 'time':t1-t0, 'sample':filename+'_total'} save_dimreduce(dat, Y_train, y_train, metadata, filename+'_total') #metadata = {'method':method, 'N':n_neigh, 'd':n_components, # 'error':error, 'time':t1-t0, 'sample':filename+'_test'} #save_dimreduce(X2, Y_test, y_test, metadata, filename+'_test') # plot in 3D plot_dimreduce_3D(Y_train, y_train[:,1], Y_test, y_test[:,1], method, n_neigh, error, t1-t0, filename, two=False) #====================================================================# elif args.alg == 'ISO': method='IsoMap' print "performing IsoMap with",n_neigh,"nearest neighbors" print "on training sample of",len(dat),"objects" t0 = time() A = Isomap(n_neigh, n_components, eigen_solver='dense') error = A.fit(train).reconstruction_error() Y = A.fit_transform(train) #Y2 = A.transform(test) t1 = time() print "%s: %.2g sec" %(args.alg, t1-t0) print "reconstruction error: ", error print "begin plotting" plot_dimreduce(Y, traincols, method, n_neigh, sample, axis=0) plot_dimreduce(Y, traincols, method, n_neigh, sample, axis=1) plot_dimreduce(Y, traincols, method, n_neigh, sample, axis=2) plot_dimreduce_3D(Y, traincols, Y, traincols, method, n_neigh, (t1-t0), error, sample) elif args.alg == 'LDA': print "performing LDA" X, Xc, y = prep_catalog.whiten_data(dat, mkeys) (X_train, X_test), (y_train, y_test) = split_samples(X, y, [0.75, 0.25], random_state=0) DRclf = LDA(3, priors=None) #DRclf.fit(X_train, y_train) DRtrain = DRclf.fit(X_train, y_train).transform(X_train) DRtest = DRclf.fit(X_train, y_train).transform(X_test) classes = np.unique(y_train) colors = np.array(['darkred', 'red', 'lightsalmon', 'darkgreen', 'lightgreen', 'lightseagreen', 'indigo', 'darkviolet', 'plum']) plot_LDA_3D(DRtrain, y_train, classes, colors, sample) pdb.set_trace() #classifiers = [] #predictions = [] #Nparams = np.arange(1, X.shape[1]+1) #for nc in Nparams: clf = LDA() clf.fit(DRtrain, y_train) y_pred = clf.predict(DRtest) matchesLDA = (y_pred == y_test) print np.sum(matchesLDA) pdb.set_trace() #------------------------------------------ from sklearn.neighbors import KNeighborsClassifier knc = KNeighborsClassifier(5) knc.fit(DRtrain, y_train) y_pred = knc.predict(DRtest) matchesKNN = (y_pred == y_test) print np.sum(matchesKNN) pdb.set_trace() #------------------------------------------ from astroML.classification import GMMBayes gmmb = GMMBayes(9) gmmb.fit(DRtrain, y_train) y_pred = gmmb.predict(DRtest) matchesGMMB = (y_pred == y_test) print np.sum(matchesGMMB) pdb.set_trace() #------------------------------------------ # plot the results fig = plt.figure(figsize=(5, 2.5)) fig.subplots_adjust(bottom=0.15, top=0.95, hspace=0.0, left=0.1, right=0.95, wspace=0.2) # left plot: data and decision boundary ax = fig.add_subplot(121) pdb.set_trace() im = ax.scatter(X[:, 3], X[:, 4], color=Xc, cmap=plt.cm.Spectral, s=4, lw=0) #cmap=plt.cm.binary,, zorder=2 im.set_clim(-0.5, 1) #im = ax.imshow(Z, origin='lower', aspect='auto', # cmap=plt.cm.binary, zorder=1, # extent=xlim + ylim) #im.set_clim(0, 1.5) #ax.contour(xx, yy, Z, [0.5], colors='k') #ax.set_xlim(xlim) #ax.set_ylim(ylim) ax.set_xlabel('$G$') ax.set_ylabel('$M20$') #pred, true = classification_loss(predictions, y_test) #completeness, contamination = completeness_contamination(pred, true) pdb.set_trace() #''' #t0 = time() #A = LDA(n_components, priors=None) #Y = A.fit_transform(train, targets) #Y2 = A.fit(train, targets).transform(train) #t1 = time() #print "%s: %.2g sec" %(args.alg, t1-t0) predict = A.predict(train) #print "Predicted classes:", predict #pdb.set_trace() #pdb.set_trace() #''' plot_LDA_3D(Y2, targets, classes, colors, sample) plot_LDA(Y2, targets, classes, colors, sample, axis=0) plot_LDA(Y2, targets, classes, colors, sample, axis=1) plot_LDA(Y2, targets, classes, colors, sample, axis=2) pdb.set_trace()
#---------------------------------------------------------------------- # Isomap projection print "Computing Isomap embedding" t0 = time() D_iso = Isomap(n_neighbors, n_components=2).fit_transform(D_scaled) print "Done in time %.2fs " % (time() - t0) #---------------------------------------------------------------------- # Locally linear embedding n_neighbors = 35 print "Computing LLE embedding" clf = LocallyLinearEmbedding(n_neighbors, n_components=2, method='modified') t0 = time() D_lle = clf.fit_transform(D_scaled) print "Done in time %.2fs " % (time() - t0) print "Reconstruction error: %g" % clf.reconstruction_error_ #---------------------------------------------------------------------- # kernel PCA print "Computing kPCA embedding" kpca = KernelPCA(n_components=2, kernel="rbf", gamma=0.0028942661247167516) t0 = time() D_kpca = kpca.fit_transform(D_scaled) print "Done in time %.2fs " % (time() - t0) plot_embedding(D_pca, 1, rescale=None, title="PCA projection") plot_embedding(D_iso, 2, rescale=None, title="Isomap projection") plot_embedding(D_lle, 3, rescale=None, title="LLE projection", legend_loc="lower right") plot_embedding(D_kpca, 4, rescale=None, title="kPCA projection")
fdata = iso.fit_transform(digits["data"]) fig = plt.figure() ax = fig.add_subplot(111, projection="3d") plt.scatter(fdata[:, 0], fdata[:, 1], zs=fdata[:, 2], c=digits["target"], s=100) plt.show() # LLE from sklearn.manifold import LocallyLinearEmbedding lle = LocallyLinearEmbedding(n_neighbors=15, n_components=3, method="modified") fig = plt.figure() fdata = lle.fit_transform(digits["data"]) ax = fig.add_subplot(111, projection="3d") plt.scatter(fdata[:, 0], fdata[:, 1], zs=fdata[:, 2], c=digits["target"], s=100) plt.show() # MDS from sklearn.manifold import MDS mds = MDS(n_components=3) fig = plt.figure() fdata = mds.fit_transform(digits["data"]) ax = fig.add_subplot(111, projection="3d")
ax.add_artist(ab) ax.plot(ica_x[j], ica_y[j], 'ro', markersize=2) now_image = np.append(now_image, j) plt.show() # for LLE for i in range(10): temp = np.argmax(y_data, axis=1) x = x_data[temp == i] y = y_data[temp == i] lle = LocallyLinearEmbedding(n_components=2) data_lle = lle.fit_transform(x) lle_x = data_lle[:, 0] lle_y = data_lle[:, 1] draw_image = np.reshape(x, (len(x), 28, 28)) now_image = np.array([], dtype='int32') fig, ax = plt.subplots(figsize=(10, 9)) s = 'LLE : ' + str(i) plt.title(s) plt.xlabel('First Principal Component') plt.ylabel('Second Principal Component') #plt.axis([-7,9,-7,8]) ax.plot(lle_x, lle_y, 'b.', markersize=1)
fig = plt.figure(1) ax = fig.add_subplot(211, projection='3d') ax.scatter(xs, ys, zs, c=labels) ax.set_title("Manifold N") ax_transform = fig.add_subplot(212) ax_transform.scatter(iso_transformed[:, 0], iso_transformed[:, 1], c=labels) ax_transform.set_title("Isomap embedded") # 3 thetas1 = np.arange(0, np.pi, np.pi / 300) thetas2 = np.arange(0, np.pi, np.pi / 300) xs = np.hstack((100 * np.sin(thetas1), 100 * np.sin(thetas2))) zs = np.hstack((300 + 100 * np.cos(thetas1), 100 + 100 * np.cos(thetas2))) xs = xs + 5 * np.random.randn(600) zs = zs + 5 * np.random.randn(600) ys = 10 * np.random.randn(600) embedding = LocallyLinearEmbedding(n_neighbors=5, reg=0.1) #embedding = Isomap() lle_transformed = embedding.fit_transform(np.vstack((xs, ys, zs)).T) fig = plt.figure(2) ax = fig.add_subplot(211, projection='3d') ax.scatter(xs, ys, zs, c=labels) ax.set_title("Manifold 3") ax_transform = fig.add_subplot(212) ax_transform.scatter(lle_transformed[:, 0], lle_transformed[:, 1], c=labels) ax_transform.set_title("LLE embedded") plt.show()
def LLE(data, n_components=57): embedding = LocallyLinearEmbedding(n_components=n_components) X_transformed = embedding.fit_transform(data) return X_transformed
''' train PCA basis based on training.txt and output dimension-reduced coefficients for both training.txt and testing.txt ''' from __future__ import division import sys from sklearn.decomposition import PCA from sklearn.manifold import Isomap from sklearn.manifold import LocallyLinearEmbedding from sklearn import preprocessing import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm from mpl_toolkits.mplot3d import Axes3D import random from colorsys import hsv_to_rgb final_dim = 30 data = np.genfromtxt("100examples.txt", delimiter=',') pca = PCA(n_components=final_dim) isomap = Isomap(n_components=final_dim) lle = LocallyLinearEmbedding(n_components=final_dim) data_xformed = lle.fit_transform(data) np.savetxt("lle_data_30_dims.txt", data_xformed, delimiter=',')
from sklearn.manifold import LocallyLinearEmbedding import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt tetra_freq = np.load('tetrafreq.npy') phylum_index = np.load('phylumIndex.npy') phylum_names = np.load('phylumNames.npy') lle = LocallyLinearEmbedding(n_components=2) lle_result = lle.fit_transform(tetra_freq) plt.figure() for c, i, name in zip ("bgrcmykw", list(range(7, -1, -1)), phylum_names): plt.scatter(lle_result[phylum_index == i, 0], lle_result[phylum_index == i, 1], c=c, label=name) plt.title('LLE of tetranucleotide') plt.legend(loc=3, fontsize=10) plt.savefig('LLE.png')
cells = opts.high / opts.step lle_gmm_results = np.zeros((cells,opts.iters)) D = scale(X) n_samples, n_features = D.shape # chosen by hyperparam search in a separate test. n_neighbors = 35 # For the specified number of principal components, do the clustering dimension_list = range(opts.low, opts.high + 1, opts.step) data_files = [] for i in dimension_list: index = (i / opts.step) - 1 lle = LocallyLinearEmbedding(n_neighbors, n_components=i, method='standard') X_lle = lle.fit_transform(D) for j in range(0,opts.iters,1): gaussmix = GMM(n_components=true_k, covariance_type='tied', n_init=10, n_iter=1000) gaussmix.fit(X_lle) gaussmix_labels = gaussmix.predict(X_lle) homog = metrics.homogeneity_score(labels[:,0], gaussmix_labels) print "Gaussian mixture homogeneity: %0.3f" % homog test_result = {"Model": "LLE", "Dimension": i, "Homogeneity": homog} index = pd.Index([0], name='rows') data_files.append(pd.DataFrame(data=test_result,index=index)) # Save the data to a file: print "...Done" print "...rbinding DataFrames"
koor_x = ['false', 'true'] koor_y = besar kelas_res = list(kelas_res) valp = kelas_res.count(False) valn = kelas_res.count(True) new_y = [] new_y.append(valp) new_y.append(valn) plt.bar(koor_x, new_y, label='After SMOTE+TOMEK', color='b', width=0.3, align='center') plt.bar(koor_x, koor_y, label='Before SMOTE+TOMEK', color='r', width=0.3, align='edge') plt.xlabel('class') plt.ylabel('value') plt.legend() plt.show() embedding = LocallyLinearEmbedding(n_components=5, method='ltsa', eigen_solver='dense') # method='hessian', eigen_solver='dense' X_transformed = embedding.fit_transform(df_resm)
class Model(nn.Module): def __init__(self, args): super(Model, self).__init__() self.temperature = args.temperature self.base = resnet12() self.nFeat = self.base.nFeat self.clasifier = nn.Conv2d(self.nFeat, args.num_classes, kernel_size=1) self.args = args if (args.method in {'CBM', 'CBM_LLE'}): with open(osp.join(args.save_dir, 'base_proto.pickle'), 'rb') as fo: self.base_proto = pickle.load(fo) # [64 512] if (args.method == 'CBM_LLE'): self.LLE = LocallyLinearEmbedding(n_neighbors=args.k, n_components=args.dim) if (args.L2): self.base_proto = F.normalize(self.base_proto, p=2, dim=-1) self.base_proto = torch.from_numpy( self.LLE.fit_transform( self.base_proto.cpu().numpy())).cuda() self.base_proto = self.base_proto.unsqueeze(0) if (self.args.similarityOnBase == 'cosine'): self.base_proto = F.normalize(self.base_proto, p=2, dim=-1) def test(self, ftrain, ftest, batch_size, num_way, num_test): ftrain = ftrain.mean((-1, -2)) ftest = ftest.mean((-1, -2)) phi = self.calPhi(ftrain, ftest, batch_size, num_way, num_test) if (self.args.method in {'CBM', 'CBM_LLE'}): varPhi = self.calVarPhi(ftrain, ftest, batch_size, num_way, num_test) return self.args.alpha * phi + ( 1 - self.args.alpha) * varPhi # [4 30 5] else: return phi def calPhi(self, ftrain, ftest, batch_size, num_way, num_test): ftrain = ftrain.view(batch_size, 1, num_way, -1) ftest = ftest.view(batch_size, num_test, 1, -1) ftrain = F.normalize(ftrain, p=2, dim=-1) ftest = F.normalize(ftest, p=2, dim=-1) scores = torch.sum(ftest * ftrain, dim=-1) # [4 30 5] return scores def calVarPhi(self, ftrain, ftest, batch_size, num_way, num_test): if (self.args.method == 'CBM_LLE'): if (self.args.L2): ftrain = F.normalize(ftrain, p=2, dim=-1) ftest = F.normalize(ftest, p=2, dim=-1) ftrain = torch.from_numpy(self.LLE.transform( ftrain.cpu().numpy())).cuda() ftest = torch.from_numpy(self.LLE.transform( ftest.cpu().numpy())).cuda() ftrain = ftrain.unsqueeze(1) ftest = ftest.unsqueeze(1) if (self.args.similarityOnBase == 'cosine'): ftrain = F.normalize(ftrain, p=2, dim=-1) ftrain = (ftrain * self.base_proto).sum(-1) ftest = F.normalize(ftest, p=2, dim=-1) ftest = (ftest * self.base_proto).sum(-1) else: # Euclidean ftrain = -(ftrain - self.base_proto).norm(dim=-1) ftest = -(ftest - self.base_proto).norm(dim=-1) if (self.args.softmax): ftrain = F.softmax(ftrain, dim=-1) ftest = F.softmax(ftest, dim=-1) if (self.args.similarityOfDistribution == 'cosine'): ftrain = F.normalize(ftrain, p=2, dim=-1).view(batch_size, 1, num_way, -1) ftest = F.normalize(ftest, p=2, dim=-1).view(batch_size, num_test, 1, -1) scores = (ftrain * ftest).sum(-1) elif (self.args.similarityOfDistribution == 'Euclidean'): ftrain = F.normalize(ftrain, p=2, dim=-1).view(batch_size, 1, num_way, -1) ftest = F.normalize(ftest, p=2, dim=-1).view(batch_size, num_test, 1, -1) scores = -(ftrain - ftest).norm(dim=-1) else: # KL ftrain = F.softmax(ftrain, dim=-1).view(batch_size, 1, num_way, -1) ftest = F.softmax(ftest, dim=-1).view(batch_size, num_test, 1, -1).log() scores = -(ftrain * (ftrain.log() - ftest)).sum(dim=-1) return scores def forward(self, xtrain, xtest, ytrain, ytest): batch_size, num_train = xtrain.size(0), xtrain.size(1) num_test = xtest.size(1) num_way = ytrain.size(2) ytrain = ytrain.transpose(1, 2) xtrain = xtrain.view(-1, xtrain.size(2), xtrain.size(3), xtrain.size(4)) xtest = xtest.view(-1, xtest.size(2), xtest.size(3), xtest.size(4)) x = torch.cat((xtrain, xtest), 0) f = self.base(x) ftrain = f[:batch_size * num_train] ftrain = ftrain.view(batch_size, num_train, -1) ftrain = torch.bmm(ytrain, ftrain) ftrain = ftrain.div(ytrain.sum(dim=2, keepdim=True).expand_as(ftrain)) ftrain = ftrain.view(-1, *f.size()[1:]) # [4*5 512 6 6] ftest = f[batch_size * num_train:] ftest = ftest.view(-1, *f.size()[1:]) # [4*30 512 6 6] if not self.training: score = self.test(ftrain, ftest, batch_size, num_way, num_test) # score = score.view(batch_size*num_test, num_way) return score else: ytest = self.clasifier(ftest) * self.temperature # [4*30 64 6 6] return ytest
from sklearn.manifold import LocallyLinearEmbedding from sklearn import preprocessing import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm from mpl_toolkits.mplot3d import Axes3D import random from colorsys import hsv_to_rgb pca = PCA(n_components=2) isomap = Isomap(n_components=2) lle = LocallyLinearEmbedding(n_components=2) data = np.genfromtxt('data01_small.txt', delimiter=',') pca_xform = pca.fit_transform(data) isomap_xform = isomap.fit_transform(data) lle_xform = lle.fit_transform(data) label = [0]*100+[1]*100 rgbs = [(0.5,0,0), (0,0.5,0)] plt.figure() xs = pca_xform[:,0] ys = pca_xform[:,1] ax = plt.subplot(111) for i in xrange(len(xs)): ax.text(xs[i], ys[i], str(label[i]), color=rgbs[label[i]], fontdict={'weight': 'bold', 'size': 9}) t = (max(xs)-min(xs))*0.1 ax.axis([min(xs)-t, max(xs)+t, min(ys)-t, max(ys)+t]) plt.xticks([]), plt.yticks([]) plt.title('PCA')
def getLLE(self): lle = LocallyLinearEmbedding(n_neighbors=4) self.dataPCA = lle.fit_transform(self.data.values[0:768, 0:8]) self.labels = np.array(self.data.values[:, 8], int)
# ISOMAP print('ISOMAP') from sklearn.manifold import Isomap iso = Isomap(n_components=3, n_neighbors=7) fdata = iso.fit_transform(authors) show_figure(fdata, labels, ulabs, 'ISOMAP') # LLE print('LLE') from sklearn.manifold import LocallyLinearEmbedding lle = LocallyLinearEmbedding(n_neighbors=7, n_components=3, method='standard') fdata = lle.fit_transform(authors) print(lle.reconstruction_error_) show_figure(fdata, labels, ulabs, 'LLE') # MDS print('MDS') from sklearn.manifold import MDS mds = MDS(n_components=3) fdata = mds.fit_transform(authors) print(mds.stress_) show_figure(fdata, labels, ulabs, 'MDS') # Spectral Embedding
spherical_helicoid_1024 = spherical_helicoid(0.5, 3, 1024) spherical_helicoid_2048 = spherical_helicoid(0.5, 3, 2048) spherical_helicoid_4096 = spherical_helicoid(0.5, 3, 4096) # a) Klein Bottle # From the data below, we can see that the optimal number of neighbors depends on the size of the dataset. from sklearn.manifold import LocallyLinearEmbedding klein_bottle_data = [klein_bottle_1024, klein_bottle_2048, klein_bottle_4096] optimal_neighbors = [] for data_set in klein_bottle_data: minimum_error = float("inf") optimal_k = 0 for k in range(3, 10): embedding = LocallyLinearEmbedding(n_neighbors=k, n_components=2) X_transformed = embedding.fit_transform(data_set) reconstruction_error = embedding.reconstruction_error_ if reconstruction_error < minimum_error: optimal_k = k minimum_error = reconstruction_error optimal_neighbors.append(optimal_k) print("Optimal Number of Neighbors for N=1024: " + str(optimal_neighbors[0])) print("Optimal Number of Neighbors for N=2048: " + str(optimal_neighbors[1])) print("Optimal Number of Neighbors for N=4096: " + str(optimal_neighbors[2])) # b) Circular Helicoid # From the data below, we can see that the optimal number of neighbors depends on the size of the dataset. from sklearn.manifold import LocallyLinearEmbedding circular_helicoid_data = [
def plot2d(X, y, scale=True, normalize=False, embedding='pca', title=''): """ Plot data transformed into two dimensions by PCA. PCA transforms into a new embedding dimension such that the first dimension contains the maximal variance and following dimensions maximal remaining variance. This shoudl spread the observed n-dimensional data maximal. This is unsupervised and will not consider target values. """ if (scale): scaler = StandardScaler() X = scaler.fit_transform(X) if (normalize): normalizer = Normalizer(norm='l2') X = normalizer.fit_transform(X) if (embedding is 'pca'): pca = PCA(n_components=2) X_transformed = pca.fit_transform(X) elif (embedding is 'isomap'): isomap = Isomap(n_components=2, n_neighbors=20) X_transformed = isomap.fit_transform(X) elif (embedding is 'lle' ): lle = LocallyLinearEmbedding(n_components=2, n_neighbors=5) X_transformed = lle.fit_transform(X) elif (embedding is 'tsne'): t_sne = TSNE(n_components=2) X_transformed = t_sne.fit_transform(X) elif (embedding is 'spectral'): se = SpectralEmbedding(n_components=2) X_transformed = se.fit_transform(X) elif (embedding is 'mds'): mds = MDS(n_components=2) X_transformed = mds.fit_transform(X) elif (embedding is 'gallery'): plt.figure(1) plt.subplot(231) plt.title('pca') X_t = PCA(n_components=2).fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(232) plt.title('isomap') X_t = Isomap(n_neighbors=20).fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(233) plt.title('lle') X_t = LocallyLinearEmbedding(n_neighbors=20).fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(234) plt.title('tsne') X_t = TSNE().fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(235) plt.title('spectral') X_t = SpectralEmbedding().fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(236) plt.title('mds') X_t = MDS().fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.suptitle('Gallery transforms ' + title) return plt else: raise ValueError("Choose between pca, isomap and tsne") plt.title(title + ' ' + embedding + ' plot') sc = plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y) plt.colorbar(sc) return plt
# Calculate the Locally Linear Embedding # NOTE THAT YOUR DATA NEEDS TO BE NORMALIZED from sklearn.manifold import LocallyLinearEmbedding # Calculate LLE embedding lle = LocallyLinearEmbedding(n_neighbors=20, n_components=2) x, y = np.array(lle.fit_transform(data)).T
def fit_transform(self, X): """ 计算降维结果 :param X: 高维数据矩阵,每一行是一个高维数据点 :return: """ (n, m) = X.shape print(self.parameters) # 用经典的降维方法 if self.affinity == 'PCA': # 直接返回 PCA 的降维结果 print('Classical method: PCA...') pca = PCA(n_components=self.n_components) return pca.fit_transform(X) elif self.affinity == 'MDS': # 直接返回 MDS 的降维结果 print('Classical method: MDS...') mds = MDS(n_components=self.n_components) return mds.fit_transform(X) elif self.affinity == 'Isomap': # 直接返回 Isomap 的降维结果 print('Classical method: Isomap...') iso = Isomap(n_components=self.n_components, n_neighbors=self.parameters['n_neighbors']) return iso.fit_transform(X) elif self.affinity == 't-SNE': # 直接返回 t-SNE 的降维结果 print('Classical method: t-SNE...') tsne = TSNE(n_components=self.n_components, perplexity=self.parameters['perplexity']) return tsne.fit_transform(X) elif self.affinity == 'cTSNE': # 用不加速版本的t-SNE降维 print('Classical method: classical t-SNE...') from ArtDR import tsne return tsne.tsne(X, perplexity=self.parameters['perplexity'], path=self.path, config_str='t-SNE ') elif self.affinity == 'LLE': # 直接返回 LLE 的降维结果 print('Classical method: LLE...') lle = LocallyLinearEmbedding( n_components=self.n_components, n_neighbors=self.parameters['n_neighbors']) return lle.fit_transform(X) elif self.affinity == 'geo-t-SNE': # 用基于测地线距离的 t-SNE 方法 print('Geodesic t-SNE...') gtsne = geoTsne(n_neighbors=self.parameters['n_neighbors'], perplexity=self.parameters['perplexity']) return gtsne.fit_transform(X, n_components=self.n_components) if self.parameters['use_skeleton']: # 用骨架点的方法 return self.skeleton_fit_transform(X) # 用我们自己设计的降维方法 if self.parameters['neighborhood_type'] == 'iter': # 用迭代的方式 W = self.iter_affinity_matrix(X) else: W = self.affinity_matrix(X) # 用我们的普通方法 if self.frame == 'MDS': print('Using MDS frame...') mds = MDS(n_components=self.n_components, dissimilarity='precomputed') Y = mds.fit_transform(W) return Y elif self.frame == 't-SNE': print('Using t-SNE frame...') Y = tsneFrame.tsne_plus(W, self.parameters['perplexity'], path=self.path, config_str=self.config_str) return Y elif self.frame == 't-SNE+': print('Using t-SNE framework in sklearn...') tsne = tsneFramePlus.tsnePlus( n_components=self.n_components, perplexity=self.parameters['perplexity']) Y = tsne.fit_transform(W) return Y else: print("Wrong frame name!") return
print(features.shape) feats = fs.mutual_info_classif(features, newlabels, n_neighbors=5, random_state=0) max_indices = sorted(range(len(feats)), key=lambda i: feats[i])[-64:] #picking max 64 features print(len(max_indices)) features = np.reshape(features, (len(features), -1)) newfeatures = [] for f in features: newfeatures.append(f[max_indices]) features = np.array(newfeatures) features = np.reshape(features, (len(features), -1)) print(features.shape) lle = LocallyLinearEmbedding(n_components=2, max_iter=500, method='ltsa', n_jobs=7) X_embedded = lle.fit_transform(features) print(X_embedded.shape) with open('../Manifold_features/ltsa', 'wb') as fp: pickle.dump(X_embedded, fp)
import numpy as np from sklearn import datasets import matplotlib.pyplot as plt from sklearn.manifold import LocallyLinearEmbedding from mpl_toolkits.mplot3d import Axes3D np.random.seed(0) X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500) model = LocallyLinearEmbedding(n_components=2, n_neighbors=15) Z = model.fit_transform(X) plt.figure(19) ax = plt.axes(projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color) ax.view_init(4, -72) plt.figure(20) plt.scatter(Z[:, 0], Z[:, 1], c=color) plt.show()
def dim_reduce(self, method="tsne", target_dim=2, points=None, metric="minkoswki"): try: if len(self.reduced_data) != 0: if self.reduced_data_method == method and method != "isomap": return self.reduced_data elif method == "isomap" and self.reduced_data_method == method: if self.reduced_data_method_metric == metric: return self.reduced_data except: pass if method == "tsne": from sklearn.manifold import TSNE tsne = TSNE(n_components=target_dim, random_state=42) np.set_printoptions(suppress=True) self.reduced_data_method_metric = "" self.reduced_data_method = "tsne" if points == None: self.reduced_data = tsne.fit_transform( self.word_vectors[:1000]) else: self.reduced_data = tsne.fit_transform(points) elif method == "truncated_svd": from sklearn.decomposition import TruncatedSVD print("using TruncatedSVD...") svd = TruncatedSVD(n_components=target_dim, n_iter=10, random_state=42) self.reduced_data_method_metric = "" self.reduced_data_method = "truncated_svd" if points == None: self.reduced_data = svd.fit_transform(self.word_vectors[:1000]) else: self.reduced_data = svd.fit_transform(points) print("sd-sum is:\t", svd.explained_variance_ratio_.sum()) elif method == "spectral": from sklearn.manifold import SpectralEmbedding se = SpectralEmbedding(n_components=target_dim, random_state=42) self.reduced_data_method_metric = "" self.reduced_data_method = "spectral" if points == None: self.reduced_data = se.fit_transform(self.word_vectors[:1000]) else: self.reduced_data = se.fit_transform(points) elif method == "isomap": from sklearn.manifold.isomap_mod import Isomap i = Isomap(n_components=target_dim, max_iter=1000, path_method='D', neighbors_algorithm='auto') self.reduced_data_method_metric = metric self.reduced_data_method = "isomap" if points == None: self.reduced_data = i.fit_transform(self.word_vectors[:1000], metric=metric) else: self.reduced_data = i.fit_transform(points, metric=metric) elif method == "lle": from sklearn.manifold import LocallyLinearEmbedding lle = LocallyLinearEmbedding(n_components=target_dim, max_iter=1000, neighbors_algorithm='auto') self.reduced_data_method_metric = "" self.reduced_data_method = "lle" if points == None: self.reduced_data = lle.fit_transform(self.word_vectors[:1000]) else: self.reduced_data = lle.fit_transform(points) elif method == "kpca": from sklearn.decomposition import PCA, KernelPCA kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10) self.reduced_data_method_metric = "" self.reduced_data_method = "kpca" if points == None: self.reduced_data = kpca.fit_transform( self.word_vectors[:1000]) else: self.reduced_data = kpca.fit_transform(points) return self.reduced_data
if __name__ == '__main__': pth = './data.txt' data, label = load_data(pth) # print(data,label) # PCA pca = PCA(n_components=2) pca_ = pca.fit_transform(data) visual(pca_, label, "PCA") # LDA lda = LinearDiscriminantAnalysis() lda_ = lda.fit_transform(data, label) visual(lda_, label, "LDA") # KPCA kpca = KernelPCA(n_components=2, kernel='rbf') kpca_ = kpca.fit_transform(data) visual(kpca_, label, "KPCA") # Isomap iso = Isomap(n_components=2) iso_ = iso.fit_transform(data) visual(iso_, label, "Isomap") # LLE lle = LocallyLinearEmbedding(n_components=2) lle_ = lle.fit_transform(data) visual(lle_, label, "LLE") # Laplacian Eigenmaps le = SpectralEmbedding(n_components=2) le_ = le.fit_transform(data) visual(le_, label, "Laplacian Eigenmaps")
import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import fetch_olivetti_faces from sklearn.manifold import LocallyLinearEmbedding # Set random seed for reproducibility np.random.seed(1000) if __name__ == '__main__': # Create the dataset faces = fetch_olivetti_faces() # Train LLE lle = LocallyLinearEmbedding(n_neighbors=15, n_components=2) X_lle = lle.fit_transform(faces['data']) # Plot the result fig, ax = plt.subplots(figsize=(18, 10)) for i in range(100): ax.scatter(X_lle[i, 0], X_lle[i, 1], marker='o', s=100) ax.annotate('%d' % faces['target'][i], xy=(X_lle[i, 0] + 0.0015, X_lle[i, 1] + 0.0015)) ax.set_xlabel(r'$x_0$') ax.set_ylabel(r'$x_1$') ax.grid() plt.show()
#03-01.py X, y = preprocess(data, shuffle=False, n_samples=1000, normalization=None) from sklearn.manifold import LocallyLinearEmbedding lle = LocallyLinearEmbedding(n_neighbors=15, n_components=3, method='standard') X_proj = lle.fit_transform(X) three_component_plot(X_proj[:, 0], X_proj[:, 1], X_proj[:, 2], y, labels, trim_outliers=True)
forest_test(X_lda, Y) #####LDA can also be used as a classifier. Therefore, we can now test how an LDA Classifier can perform in this situation. X_Reduced, X_Test_Reduced, Y_Reduced, Y_Test_Reduced = train_test_split( X_lda, Y, test_size=0.30, random_state=101) start = time.process_time() lda = LinearDiscriminantAnalysis().fit(X_Reduced, Y_Reduced) print(time.process_time() - start) predictionlda = lda.predict(X_Test_Reduced) print(confusion_matrix(Y_Test_Reduced, predictionlda)) print(classification_report(Y_Test_Reduced, predictionlda)) #####Locally Linear Embedding is a dimensionality reduction technique based on Manifold ##Learning. A Manifold is an object of D dimensions which is embedded in an higher-dimensional space. ## Manifold Learning aims then to make this object representable in its original D dimensions instead of being represented in an unnecessary greater space. from sklearn.manifold import LocallyLinearEmbedding embedding = LocallyLinearEmbedding(n_components=3) X_lle = embedding.fit_transform(X) forest_test(X_lle, Y) ####t-SNE is non-linear dimensionality reduction technique which is typically used to visualize high dimensional datasets. #####t-SNE works by minimizing the divergence between a distribution constituted by the pairwise probability similarities ### of the input features in the original high dimensional space and its equivalent in the reduced low dimensional space. ##t-SNE makes then use of the Kullback-Leiber (KL) divergence in order to measure the dissimilarity of the two different ####distributions. The KL divergence is then minimized using gradient descent. from sklearn.manifold import TSNE start = time.process_time() tsne = TSNE(n_components=3, verbose=1, perplexity=40, n_iter=300) X_tsne = tsne.fit_transform(X) print(time.process_time() - start) forest_test(X_tsne, Y) #####Autoencoders are a family of Machine Learning algorithms which can be used as a ###dimensionality reduction technique. The main difference between Autoencoders and ##other dimensionality reduction techniques is that Autoencoders use non-linear
("kpca", KernelPCA(n_components=2)), ("log_reg", LogisticRegression()) ]) param_grid = [{ "kpca__gamma": np.linspace(0.03, 0.05, 10), "kpca__kernel": ["rbf", "sigmoid"] }] grid_search = GridSearchCV(clf, param_grid, cv=3) grid_search.fit(X, y # LLE from sklearn.manifold import LocallyLinearEmbedding lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10) X_reduced = lle.fit_transform(X) #======================================================================================# # K-Means from sklearn.cluster import KMeans k = 5 kmeans = KMeans(n_clusters=k) y_pred = kmeans.fit_predict(X) kmeans.cluster_centers_ # DBSCAN from sklearn.cluster import DBSCAN from sklearn.datasets import make_moons X, y = make_moons(n_samples=1000, noise=0.05) dbscan = DBSCAN(eps=0.05, min_samples=5) dbscan.fit(X)
from sklearn.decomposition import PCA pca = PCA(n_components=0.95) X_reduced = pca.fit_transform(data[0]) np.sum(pca.explained_variance_ratio_) dataset = keras.datasets.mnist.load_data() images = dataset[1][0].reshape(10000, 28 * 28) labels = dataset[1][1] pca = PCA(n_components=154) images_reduced = pca.fit_transform(images) from sklearn.manifold import LocallyLinearEmbedding lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10) X_lle = lle.fit_transform(data[0]) from sklearn.manifold import TSNE tsne = TSNE(n_components=2) x_clusters = tsne.fit_transform(images) import matplotlib.pyplot as plt for i in range(0, 10): indices = [] for j in range(2 * 5000): if labels[j] == i: indices.append(j) plt.scatter(x_clusters[(indices), 0], x_clusters[(indices), 1],
#https://blog.csdn.net/u012162613/article/details/42192293 import numpy as np from sklearn.decomposition import PCA data = np.array([[1., 1.], [0.9, 0.95], [1.01, 1.03], [2., 2.], [2.03, 2.06], [1.98, 1.89]]) data.shape #(6, 2) pca = PCA(n_components=1) newData_shape = pca.fit_transform(data).shape #(6, 1) # 4. Multidimensional Scaling #https://scikit-learn.org/stable/modules/generated/sklearn.manifold.MDS.html from sklearn.datasets import load_digits from sklearn.manifold import MDS X, _ = load_digits(return_X_y=True) X.shape #(1797, 64) mds = MDS(n_components=2) X_transformed = mds.fit_transform(X[:100]) #(100, 2) # 5. Locally Linear Embedding #https://scikit-learn.org/stable/modules/generated/sklearn.manifold.LocallyLinearEmbedding.html from sklearn.datasets import load_digits from sklearn.manifold import LocallyLinearEmbedding X, _ = load_digits(return_X_y=True) X.shape #(1797, 64) lle = LocallyLinearEmbedding(n_components=2) X_transformed = lle.fit_transform(X[:100]) #(100, 2)
#03-02.py X, y = preprocess(data, shuffle=False, n_samples=1000, normalization=None) from sklearn.manifold import LocallyLinearEmbedding lle = LocallyLinearEmbedding(n_neighbors=15, n_components=3, method='modified') X_proj = lle.fit_transform(X) three_component_plot(X_proj[:, 0], X_proj[:, 1], X_proj[:, 2], y, labels, trim_outliers=True)
f.close() import numpy as np N = len(dic_cl.items()) X = np.zeros((N, 7)) for i, (key, val) in enumerate(dic_cl.iteritems()): X[i, :] = dic_cl[key] from sklearn.manifold import LocallyLinearEmbedding from sklearn.preprocessing import scale lle = LocallyLinearEmbedding(n_components=3, n_neighbors=20) print X.max(axis=0) Y3 = lle.fit_transform(scale(X)) Y3 -= Y3.min(axis=0) print len(dic_cl.items()) lle = LocallyLinearEmbedding(n_components=1, n_neighbors=20) Y1 = lle.fit_transform(X) Y1 -= Y1.min() o1 = open("1-d.csv", "w") o3 = open("3-d.csv", "w") for i, (key, val) in enumerate(dic_cl.iteritems()): o1.write("%s,%f\n" % (key, Y1[i - 1])) o3.write("%s,%s\n" % (key, ",".join(map(str, Y3[i - 1, :])))) o1.close() o3.close() import pylab