def plot_3d(dataset): """TODO: Docstring for plot_3d. :returns: TODO """ from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') iso = Isomap(n_components=3) projected = iso.fit_transform(dataset.data.toarray()) print 'projected: sample: %s, feature: %s'\ % (projected.shape[0], projected.shape[1]) all_scatter = [] colors = cm.rainbow(np.linspace(0, 1, len(dataset.target_names)), alpha=0.5) for i in range(len(dataset.target_names)): points = projected[dataset.target==i,:] cur = ax.scatter(points[:,0], points[:,1], points[:,2], color=colors[i], edgecolor='k', lw=0.1, vmin=0, vmax=len(dataset.target_names)) all_scatter.append(cur) ax.legend(all_scatter, dataset.target_names, loc='lower left', scatterpoints=1) plt.savefig('isomap3d', dpi=500) plt.show() return True
def plotTrajectory(dfile): fin = open(dfile) Vsteps = [] Vtarget = fin.readline().strip().split() Vtarget = map(float,Vtarget) Vsteps.append(Vtarget) for l in fin: l = l.strip().split() if len(l) != 26: continue l = map(float,l) Vsteps.append(l) distances = [euclidean(a,Vsteps[0]) for a in Vsteps[1:]] print len(distances) _map = plt.get_cmap("winter") distcolors = _map(distances) dimred = Isomap(n_components=2) Vsteps = dimred.fit_transform(Vsteps) #objective vector plt.scatter(Vsteps[0,0],Vsteps[0,1],color='red',s=30,marker=(5,1)) #Optimization steps plt.scatter(Vsteps[1:,0],Vsteps[1:,1],color=distcolors,alpha=0.5) plt.show()
def dimension_reduce(): ''' This compares a few different methods of dimensionality reduction on the current dataset. ''' pca = PCA(n_components=2) # initialize a dimensionality reducer pca.fit(digits.data) # fit it to our data X_pca = pca.transform(digits.data) # apply our data to the transformation plt.subplot(1, 3, 1) plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target)# plot the manifold se = SpectralEmbedding() X_se = se.fit_transform(digits.data) plt.subplot(1, 3, 2) plt.scatter(X_se[:, 0], X_se[:, 1], c=digits.target) isomap = Isomap(n_components=2, n_neighbors=20) isomap.fit(digits.data) X_iso = isomap.transform(digits.data) plt.subplot(1, 3, 3) plt.scatter(X_iso[:, 0], X_iso[:, 1], c=digits.target) plt.show() plt.matshow(pca.mean_.reshape(8, 8)) # plot the mean components plt.matshow(pca.components_[0].reshape(8, 8)) # plot the first principal component plt.matshow(pca.components_[1].reshape(8, 8)) # plot the second principal component plt.show()
def isomap(similarity, euclid=False): if not euclid: print('podvod') model = Isomap(n_neighbors=15) result = model.fit_transform(similarity) return result.T
def iso_map(data, target, target_names): iso = Isomap(n_components=2) data_projected = iso.fit_transform(data) formatter = plt.FuncFormatter(lambda i, *args:target_names[int(i)]) plt.figure(figsize=(8, 8)) plt.scatter(data_projected[:, 0], data_projected[:, 1], c=target,edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', len(target_names))); plt.colorbar(ticks=sorted(list(set(target))), format=formatter) #plt.clim(-200, 0) return iso, data_projected
def ISOMAP_transform(train_feature, test_feature, n_components, n_neighbors = 5): """ ISOMAP method """ from sklearn.manifold import Isomap isomap = Isomap(n_neighbors, n_components).fit(train_feature) train_feature_transformed = isomap.transform(train_feature) test_feature_transformed = isomap.transform(test_feature) return train_feature_transformed, test_feature_transformed
def embedDistanceMatrix(dmatDf, method='kpca', n_components=2, **kwargs): """Two-dimensional embedding of sequence distances in dmatDf, returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca, sklearn-tsne""" if isinstance(dmatDf, pd.DataFrame): dmat = dmatDf.values else: dmat = dmatDf if method == 'tsne': xy = tsne.run_tsne(dmat, no_dims=n_components, perplexity=kwargs['perplexity']) elif method == 'isomap': isoObj = Isomap(n_neighbors=10, n_components=n_components) xy = isoObj.fit_transform(dmat) elif method == 'mds': mds = MDS(n_components=n_components, max_iter=3000, eps=1e-9, random_state=15, dissimilarity="precomputed", n_jobs=1) xy = mds.fit(dmat).embedding_ rot = PCA(n_components=n_components) xy = rot.fit_transform(xy) elif method == 'pca': pcaObj = PCA(n_components=None) xy = pcaObj.fit_transform(dmat)[:, :n_components] elif method == 'kpca': pcaObj = KernelPCA(n_components=dmat.shape[0], kernel='precomputed', eigen_solver='dense') try: gram = dist2kernel(dmat) except: print('Could not convert dmat to kernel for KernelPCA; using 1 - dmat/dmat.max() instead') gram = 1 - dmat / dmat.max() xy = pcaObj.fit_transform(gram)[:, :n_components] elif method == 'lle': lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=n_components, method='standard') xy = lle.fit_transform(dist) elif method == 'sklearn-tsne': tsneObj = TSNE(n_components=n_components, metric='precomputed', random_state=0, perplexity=kwargs['perplexity']) xy = tsneObj.fit_transform(dmat) elif method == 'umap': umapObj = umap.UMAP(n_components=n_components, metric='precomputed', **kwargs) xy = umapObj.fit_transform(dmat) else: print('Method unknown: %s' % method) return assert xy.shape[0] == dmatDf.shape[0] xyDf = pd.DataFrame(xy[:, :n_components], index=dmatDf.index, columns=np.arange(n_components)) if method == 'kpca': """Not sure how negative eigenvalues should be handled here, but they are usually small so it shouldn't make a big difference""" setattr(xyDf, 'explained_variance_', pcaObj.lambdas_[:n_components]/pcaObj.lambdas_[pcaObj.lambdas_>0].sum()) return xyDf
def ML( self ): data = self.data.values[ :, :-3 ] scaler = MinMaxScaler() #scaler = StandardScaler() X = scaler.fit_transform( data ) #X = data isomap = Isomap( n_components = 2 ) isomap.fit( X ) #print pca.explained_variance_ratio_ import pdb; pdb.set_trace()
def __init__(self): """ Instantiate floorplan estimator """ self.dimred = Isomap(n_neighbors=25, n_components=2) self._fingerprints = None self._label = None
class FloorplanEstimator: """ Simple estimator for rough floorplans """ def __init__(self): """ Instantiate floorplan estimator """ self.dimred = Isomap(n_neighbors=25, n_components=2) self._fingerprints = None self._label = None def fit(self, fingerprints, label): """ Estimate floorplan from labeled fingerprints :param fingerprints: list of fingerprints :param label: list of corresponding labels """ self.dimred.fit(fingerprints) self._fingerprints = fingerprints self._label = label def transform(self, fingerprints): """ Get x,y coordinates of fingerprints on floorplan :param fingerprints: list of fingerprints :return: list of [x,y] coordinates """ return self.dimred.transform(fingerprints) def draw(self): """ Draw the estimated floorplan in the current figure """ xy = self.dimred.transform(self._fingerprints) x_min, x_max = xy[:,0].min(), xy[:,0].max() y_min, y_max = xy[:,1].min(), xy[:,1].max() xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0), np.arange(y_min, y_max, 1.0)) clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0) clf.fit(xy, self._label) label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) plt.pcolormesh(xx, yy, label) plt.scatter(xy[:,0], xy[:,1], c=self._label, vmin=0)
def isomap(file_name, dimension, num_neighbors, label): balls = np.loadtxt(file_name) matrix = balls[:, 0:dimension] new_matrix = convert_angles_to_cos_sin(matrix) imap = Isomap(n_neighbors=num_neighbors, n_components=2, eigen_solver='auto', tol=0, max_iter=None, path_method='auto', neighbors_algorithm='auto') transformed_matrix = imap.fit_transform(new_matrix) ball_coords = np.zeros((balls.shape[0], dimension+3)) for i in xrange(balls.shape[0]): ball_coords[i, 0:dimension] = balls[i, 0:dimension].tolist() ball_coords[i, dimension:dimension+2] = transformed_matrix[i] if label == 'cluster': ball_coords[i, dimension+2] = balls[i, dimension].tolist() elif label == 'eq': ball_coords[i, dimension+2] = (-0.0019872041*300*np.log(abs(balls[i, dimension+1]))).tolist() elif label == 'committor': ball_coords[i, dimension+2] = (balls[i, dimension+2]/abs(balls[i, dimension+1])).tolist() print ' '.join([str(x) for x in ball_coords[i, :]])
def isomap(self, data): print 'Isomap neighbours :', self.parameters["n_neighbors"] print 'Isomap components, ie final number of coordinates :', self.k k_means_n_clusters=self.parameters['k_means_n_clusters'] isomap_params = dict(self.parameters) del isomap_params["k_means_n_clusters"] m = Isomap(neighbors_algorithm = 'kd_tree',**isomap_params)#eigen_solver='auto', tol=0, path_method='auto', neighbors_algorithm='kd_tree') x = m.fit_transform(data) error=m.reconstruction_error() geod_d = m.dist_matrix_.flatten() new_euclid_d = cdist(x, x, metric='euclidean').flatten() corr=1- pearsonr(geod_d, new_euclid_d)[0]**2 new_data = x print self.parameters return self.batch_kmeans(new_data, parameters = dict(zip(params["mini-batchk-means"], [k_means_n_clusters, 1000, 500, 1000, 'k-means++', 5])))
def isomap(self, n_components=2, n_neighbors=3, show=False): """ Calculates lower dimention coordinates using the isomap algorithm. :param n_components: dimentionality of the reduced space :type n_components: int, optional :param n_neighbors: Used by isomap to determine the number of neighbors for each point. Large neighbor size tends to produce a denser map. :type n_neighbors: int, optional :param show: Shows the calculated coordinates if true. :type show: boolean, optional """ model = Isomap(n_components=n_components, n_neighbors=n_neighbors) self.pos = model.fit(self.dismat).embedding_ if show: return self.pos
def compute_iso_map(self, original_features): feature_matrix = original_features.drop('file', 1).as_matrix() feature_matrix = np.nan_to_num(feature_matrix) dimen_reductor = Isomap(n_components=self.n_components) full_size = feature_matrix.shape[0] train_size = int(self.ratio * full_size) row_indices = list(range(full_size)) feature_training_indices = np.random.choice(row_indices, size = train_size) training_feature_matrix = feature_matrix[feature_training_indices, :] dimen_reductor.fit(training_feature_matrix) reduced_features = dimen_reductor.transform(feature_matrix) reduced_normalized_features = reduced_features - reduced_features.min(axis=0) reduced_normalized_features /= reduced_normalized_features.max(axis=0) return reduced_normalized_features
def mult_scl(X, labels): print('labels:') for i, label in zip(range(1, len(labels) + 1), labels): print('{}: {}'.format(i, label)) isomap = Isomap() points = isomap.fit(np.nan_to_num(X)).embedding_ f, (ax1, ax2, ax3) = plt.subplots(1, 3) plot_location(labels, ax3) ax1.scatter(points[:, 0], points[:, 1], s=20, c='r') ax1.set_title('Isomap') add_labels(labels, points, ax1) mds = MDS() points = mds.fit(np.nan_to_num(X)).embedding_ ax2.scatter(points[:, 0], points[:, 1], s=20, c='g') ax2.set_title('MDS') add_labels(labels, points, ax2) plt.show()
def outputBin(data, ctrlSize,nbPheno, lPheno, binSize, sigma, nbDim=2, nbNeighbours=20): m = Isomap(n_neighbors=nbNeighbours, n_components=nbDim, eigen_solver='auto', tol=0, max_iter=None, path_method='auto', neighbors_algorithm='kd_tree') D = m.fit_transform(data) ctrl = D[:ctrlSize] ctrlTree = KDTree(ctrl, leafsize=10) length=ctrlSize mini = np.amin(D, 0); maxi=np.amax(D, 0); nbPointsX = int((maxi[0]-mini[0])/float(binSize))+1 nbPointsY = int((maxi[1]-mini[1])/float(binSize))+1 result = np.zeros(shape=(nbPheno, nbPointsX, nbPointsY)) denomCtrl = np.zeros(shape=(nbPointsX, nbPointsY)) for pointX, pointY in product(range(nbPointsX), range(nbPointsY)): x=mini[0]+(pointX+0.5)*binSize; y=mini[1]+(pointY+0.5)*binSize ctrldou, ctrli = ctrlTree.query((x, y), ctrlSize, distance_upper_bound=binSize/sqrt(2)) if min(ctrldou)<100: ctrlPoint = filter(lambda t: t[1]<ctrl.shape[0] and np.all(np.abs(ctrl[t[1]]-(x, y))<(binSize/2.0, binSize/2.0)), zip(ctrldou, ctrli)) for distance, cPoint in ctrlPoint: denomCtrl[pointX, pointY]+=dist((x,y), ctrl[cPoint], sigma) for ifilm in range(nbPheno): print 'film ', ifilm pheno = D[length:length+lPheno[ifilm]] phenoTree = KDTree(pheno, leafsize=10) for pointX, pointY in product(range(nbPointsX), range(nbPointsY)): x=mini[0]+(pointX+0.5)*binSize; y=mini[1]+(pointY+0.5)*binSize denom=denomCtrl[pointX, pointY] phenodou, phenoi=phenoTree.query((x, y), data.shape[0]-ctrlSize, distance_upper_bound=binSize/sqrt(2)) if min(phenodou)<100: phenoPoint =filter(lambda t: t[1]<pheno.shape[0] and np.all(np.abs(pheno[t[1]]-(x, y))<(binSize/2.0, binSize/2.0)), zip(phenodou, phenoi)) for distance, pPoint in phenoPoint: local = dist((x,y), pheno[pPoint], sigma) result[ifilm, pointX, pointY]+=local; denom+=local length+=lPheno[ifilm] if denom>0:result[ifilm, pointX, pointY]/=denom plotMovies('/media/lalil0u/New/workspace2/Tracking/images', result, 'pattern_b{}_s{}'.format(binSize, sigma)) return result
def plot_2d(dataset): """TODO: Docstring for plot_2d. :returns: TODO """ iso = Isomap(n_components=2) projected = iso.fit_transform(dataset.data.toarray()) print 'projected: sample: %s, feature: %s'\ % (projected.shape[0], projected.shape[1]) all_scatter = [] colors = cm.rainbow(np.linspace(0, 1, len(dataset.target_names)), alpha=0.5) for i in range(len(dataset.target_names)): points = projected[dataset.target==i,:] cur = plt.scatter(points[:,0], points[:,1], color=colors[i], edgecolor='k', lw=0.6, vmin=0, vmax=len(dataset.target_names)) all_scatter.append(cur) plt.legend(all_scatter, dataset.target_names, loc='lower left', scatterpoints=1) plt.clim(-0.5, 9.5) plt.savefig('isomap2d', dpi=500)
def embedDistanceMatrix(dist,method='tsne'): """MDS embedding of sequence distances in dist, returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca""" if method == 'tsne': xy = tsne.run_tsne(dist, no_dims=2) #xy=pytsne.run_tsne(adist,no_dims=2) elif method == 'isomap': isoObj = Isomap(n_neighbors=10, n_components=2) xy = isoObj.fit_transform(dist) elif method == 'mds': mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=15, dissimilarity="precomputed", n_jobs=1) xy = mds.fit(dist).embedding_ rot = PCA(n_components=2) xy = rot.fit_transform(xy) elif method == 'pca': pcaObj = PCA(n_components=2) xy = pcaObj.fit_transform(1-dist) elif method == 'kpca': pcaObj = KernelPCA(n_components=2, kernel='precomputed') xy = pcaObj.fit_transform(1-dist) elif method == 'lle': lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2, method='standard') xy = lle.fit_transform(dist) return xy
#Set seed np.random.seed(42) #-------------------------------FEATURE EXTRACTION--------------------------------------------------------- #Transform the images in the images folder in a 2D numpy array with on image per row and one pixel per column data = aux.images_as_matrix() #Extract 6 features using Principal Component Analysis PCA_features = PCA(n_components=6).fit_transform(data) #Extract 6 features using t-Distributed Stochastic Neighbor Embedding TSNE_features = TSNE(n_components=6, method="exact").fit_transform(data) #Extract 6 features using Isometric mapping with Isomap ISOMAP_features = Isomap(n_components=6).fit_transform(data) #Save the 18 extracted features into one feature matrix matrix = np.concatenate((PCA_features, TSNE_features, ISOMAP_features), axis=1) np.savez('featureextration.npz', matrix) #-------------------------------FEATURE SELECTION--------------------------------------------------------- def scatter_plot(features): """ Another method to check the correlation between features """ plt.figure() scatter_matrix(features, alpha=0.5, figsize=(15, 10), diagonal='kde') plt.savefig("scatter_plot.png")
fa_projected_data = FactorAnalysis( n_components=PROJECTED_DIMENSIONS).fit_transform(neural_data) color_3D_projection(fa_projected_data, variable_data, 'FA; ' + Title) # ICA ICA_projected_data = FastICA( n_components=PROJECTED_DIMENSIONS).fit_transform(neural_data) color_3D_projection(ICA_projected_data, variable_data, 'ICA; ' + Title) # Isomap N_NEIGHBORS = 30 Isomap_projected_data = Isomap( n_components=PROJECTED_DIMENSIONS, n_neighbors=N_NEIGHBORS).fit_transform(neural_data) color_3D_projection(Isomap_projected_data, variable_data, 'Isomap; ' + Title) # tSNE PERPLEXITY = 30 # normally ranges 5-50 TSNE_projected_data = TSNE( n_components=PROJECTED_DIMENSIONS, perplexity=PERPLEXITY).fit_transform(neural_data) color_3D_projection(TSNE_projected_data, variable_data, 'tSNE; ' + Title) # Multidimensional scaling MDS_projected_data = MDS(
#maxabsscaler = pp.MaxAbsScaler() #maxabsscaler.fit(X) #X = maxabsscaler.transform(X) #print('MaxAbsScaler\n========') #X = pp.normalize(X) #print('normalizer\n========') # TODO: Use PCA to reduce noise, n_components 4-14 nc = 5 #pca = PCA(n_components=nc) #pca.fit(X) #X = pca.transform(X) #print('PCA: ', nc) # Use Isomap to reduce noise, n_neighbors 2-5 nn = 4 im = Isomap(n_neighbors=nn, n_components=nc) im.fit(X) X = im.transform(X) print('Isomap: ',nn, ' comp: ', nc) # TODO: train_test_split 30% and random_state=7 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7) # TODO: Create an SVC, train and score against defaults result = findMaxSVC() print(result['score'])
def makeRingManifold(spikes, ep, angle, bin_size=200): """ spikes : dict of hd spikes ep : epoch to restrict angle : tsd of angular direction bin_size : in ms """ neurons = np.sort(list(spikes.keys())) inputs = [] angles = [] sizes = [] bins = np.arange( ep.as_units('ms').start.iloc[0], ep.as_units('ms').end.iloc[0] + bin_size, bin_size) spike_counts = pd.DataFrame(index=bins[0:-1] + np.diff(bins) / 2, columns=neurons) for i in neurons: spks = spikes[i].as_units('ms').index.values spike_counts[i], _ = np.histogram(spks, bins) rates = np.sqrt(spike_counts / (bin_size)) epi = nts.IntervalSet(ep.loc[0, 'start'], ep.loc[0, 'end']) angle2 = angle.restrict(epi) newangle = pd.Series(index=np.arange(len(bins) - 1)) tmp = angle2.groupby( np.digitize(angle2.as_units('ms').index.values, bins) - 1).mean() tmp = tmp.loc[np.arange(len(bins) - 1)] newangle.loc[tmp.index] = tmp newangle.index = pd.Index(bins[0:-1] + np.diff(bins) / 2.) tmp = rates.rolling(window=200, win_type='gaussian', center=True, min_periods=1, axis=0).mean(std=2).values sizes.append(len(tmp)) inputs.append(tmp) angles.append(newangle) inputs = np.vstack(inputs) imap = Isomap(n_neighbors=20, n_components=2, n_jobs=-1).fit_transform(inputs) H = newangle.values / (2 * np.pi) HSV = np.vstack((H, np.ones_like(H), np.ones_like(H))).T RGB = hsv_to_rgb(HSV) fig, ax = subplots() ax = subplot(111) ax.set_aspect(aspect=1) ax.scatter(imap[:, 0], imap[:, 1], c=RGB, marker='o', alpha=0.5, zorder=2, linewidth=0, s=40) # hsv display_axes = fig.add_axes([0.2, 0.25, 0.05, 0.1], projection='polar') colormap = plt.get_cmap('hsv') norm = mpl.colors.Normalize(0.0, 2 * np.pi) xval = np.arange(0, 2 * pi, 0.01) yval = np.ones_like(xval) display_axes.scatter(xval, yval, c=xval, s=20, cmap=colormap, norm=norm, linewidths=0, alpha=0.8) display_axes.set_yticks([]) display_axes.set_xticks(np.arange(0, 2 * np.pi, np.pi / 2)) display_axes.grid(False) show() return imap, bins[0:-1] + np.diff(bins) / 2
plt.ylabel("MLLE2") # KERNEL PRINCIPAL COMPONENT ANALYSIS (KPCA) print("Performing Kernel Principal Component Analysis (KPCA) ...") plt.subplot(333) kpca = KernelPCA(n_components=2, kernel='cosine').fit_transform(X) plt.scatter(kpca[:, 0], kpca[:, 1], c=Y, cmap='viridis', s=1) plt.title('Kernel PCA') #plt.colorbar() plt.xlabel("KPCA1") plt.ylabel("KPCA2") # ISOMAP print("Performing Isomap Plotting ...") plt.subplot(334) model = Isomap(n_components=2) isomap = model.fit_transform(X) plt.scatter(isomap[:, 0], isomap[:, 1], c=Y, cmap='viridis', s=1) plt.title('Isomap') #plt.colorbar() plt.xlabel("ISO1") plt.ylabel("ISO2") # LAPLACIAN EIGENMAP print("Performing Laplacian Eigenmap (Spectral Embedding) ...") plt.subplot(335) model = SpectralEmbedding(n_components=2, n_neighbors=50) se = model.fit_transform(X) plt.scatter(se[:, 0], se[:, 1], c=Y, cmap='viridis', s=1) plt.title('Laplacian Eigenmap') #plt.colorbar()
print(x_train.shape) print(x_train) models = [] emb_size=64 num_neighbors=32 for emb_size in (32,64): print("********************* emb_size="+str(emb_size)+" ***************") models=[] models.append(LocallyLinearEmbedding(n_neighbors=num_neighbors,n_components=emb_size,n_jobs=multiprocessing.cpu_count())) models.append(SpectralEmbedding(n_neighbors=num_neighbors,n_components=emb_size,n_jobs=multiprocessing.cpu_count())) models.append(PCA(n_components=emb_size)) models.append(MDS(n_components=emb_size,n_jobs=multiprocessing.cpu_count())) models.append(Isomap(n_neighbors=num_neighbors, n_components=emb_size, n_jobs=multiprocessing.cpu_count())) models.append('matrix2vec') model_names = ['lle', 'le', 'pca', 'MDS', 'ISOMAP', 'matrix2vec'] # names corresponding to model for index, embedding in enumerate(models): print('Start running model '+model_names[index]+"...") start = datetime.datetime.now() X_transformed= np.zeros((x_train.shape[0],emb_size)) if(index<=4): # X_transformed = embedding.fit_transform(x_train) X_transformed = embedding.fit_transform(x_train) else: X_transformed=matrix2vec.matrix2vec(x_train,emb_size,topk=5,num_iter=10)
#03-03.py X, y = preprocess(data, shuffle=False, n_samples=1000, normalization=None) from sklearn.manifold import Isomap iso = Isomap(n_neighbors=15, n_components=3) X_proj = iso.fit_transform(X) three_component_plot(X_proj[:, 0], X_proj[:, 1], X_proj[:, 2], y, labels, trim_outliers=True)
def show_batch(image_batch ): plt.figure(figsize=(7,5)) for i in range(24): ax = plt.subplot(4,6,i+1) plt.imshow(image_batch[i],cmap='gray') plt.axis('off') # %% show_batch(face_data.images ) #plt.savefig('faces.png', dpi=72, bbox_inches='tight') # %% from sklearn.manifold import Isomap model = Isomap(n_components=2) isomap_face = model.fit_transform(face_data.data) isomap_face.shape # %% data_num = isomap_face.shape[0] data_count = list( range(data_num)) count_ind = data_count[0::2] from matplotlib import offsetbox def plotting_out(data, model, images=None, ax=None,cmap='gray'): ax = ax or plt.gca() model_pred = model.fit_transform(data) ax.plot(model_pred[:, 0], model_pred[:, 1], '.') for i in count_ind:
def isomap(X): i = Isomap() return i.fit_transform(X)
#%% comparing predictions with labels Y_pred = kmeans_fit.predict(X_test) #! for p, l in zip(Y_pred, Y_test): print(p, l) import pandas as pd digdf = pd.DataFrame({'pred': Y_pred, 'label': Y_test}) sum(digdf['pred'] - digdf['label'] != 0) # 396 #%% # https://scikit-learn.org/stable/modules/generated/sklearn.manifold.Isomap.html#sklearn.manifold.Isomap # https://scikit-learn.org/stable/modules/manifold.html#isomap from sklearn.manifold import Isomap X_iso = Isomap(n_neighbors=10).fit_transform(X_train) # Compute cluster centers and predict cluster index for each sample clusters = kmeans_fit.fit_predict(X_train) #! # Create a plot with subplots in a grid of 1X2 fig, ax = plt.subplots(1, 2, figsize=(8, 4)) # Adjust layout fig.suptitle('Predicted Versus Training Labels', fontsize=14, fontweight='bold') fig.subplots_adjust(top=0.85) # Add scatterplots to the subplots ax[0].scatter(X_iso[:, 0], X_iso[:, 1], c=clusters) ax[0].set_title('Predicted Training Labels') ax[1].scatter(X_iso[:, 0], X_iso[:, 1], c=Y_train)
def isomap(X=None, W=None, num_vecs=None, k=None): embedder = Isomap(n_neighbors=k, n_components=num_vecs) return embedder.fit_transform(X)
for i, ax in enumerate(axes.flat): ax.imshow(digits.images[i], cmap='binary') ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green') ax.set_xticks([]) ax.set_yticks([]) plt.show() # Dimensionality Reduction from sklearn.manifold import Isomap iso = Isomap(n_components=2) data_projected = iso.fit_transform(digits.data) plt.show() print(data_projected.shape) # plot our new data in matplotlib plt.scatter(data_projected[:, 0], data_projected[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('nipy_spectral', 10)) plt.clim(-0.05, 9.5) plt.colorbar().set_ticks(np.arange(0, 10, 1)) plt.show()
model = PCA(n_components=2) model.fit(X_train) else: print('Computing 2D Isomap Manifold') # # TODO: Implement Isomap here. save your model into the variable 'model' # Experiment with K values from 5-10. # You should reduce down to two dimensions. # # .. your code here .. from sklearn.manifold import Isomap model = Isomap(n_neighbors=5, n_components=2) model.fit(X_train) # # TODO: Train your model against data_train, then transform both # data_train and data_test using your model. You can save the results right # back into the variables themselves. # # .. your code here .. X_train = model.transform(X_train) X_test = model.transform(X_test) # # TODO: Implement and train KNeighborsClassifier on your projected 2D # training data here. You can use any K value from 1 - 15, so play around
def apply_ISOMap(proj_data, proj_weights=None): model = Isomap(n_neighbors=4, n_components=2) result = model.fit_transform(proj_data.T) return result
from __future__ import division import sys from sklearn.decomposition import PCA from sklearn.manifold import Isomap from sklearn.manifold import LocallyLinearEmbedding from sklearn import preprocessing import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm from mpl_toolkits.mplot3d import Axes3D import random from colorsys import hsv_to_rgb pca = PCA(n_components=2) isomap = Isomap(n_components=2) lle = LocallyLinearEmbedding(n_components=2) data = np.genfromtxt('data01_small.txt', delimiter=',') pca_xform = pca.fit_transform(data) isomap_xform = isomap.fit_transform(data) lle_xform = lle.fit_transform(data) label = [0]*100+[1]*100 rgbs = [(0.5,0,0), (0,0.5,0)] plt.figure() xs = pca_xform[:,0] ys = pca_xform[:,1] ax = plt.subplot(111) for i in xrange(len(xs)): ax.text(xs[i], ys[i], str(label[i]), color=rgbs[label[i]], fontdict={'weight': 'bold', 'size': 9}) t = (max(xs)-min(xs))*0.1
threes_data = threes_df.values[:, 1: len(df.columns) - 1] threes_data = (threes_data - threes_data.min()) / \ (threes_data.max() - threes_data.min()) n_neighbors = 5 n_components = 4 # 1. Apply LLE lle = LocallyLinearEmbedding(n_neighbors=n_neighbors, n_components=n_components) lle_data = lle.fit_transform(threes_data) lle_df = pd.DataFrame(lle_data) plot_three("LLE", lle_df, 0, 1, threes_df, 0.45) # 2. Apply ISOMAP iso = Isomap(n_neighbors=n_neighbors, n_components=n_components) iso_data = iso.fit_transform(threes_data) iso_df = pd.DataFrame(iso_data) plot_three("Isomap", iso_df, 0, 1, threes_df, 0.45) # 3. Use the Naive Bayes classier to classify the dataset based on the projected 4-dimension representations of the LLE and ISOMAP. df_data = df.values[:, 1: len(df.columns) - 1] test_size = 0.3 def calc_mean_accuracy(data, threshold=0.00015, miniter=500): print("Diff threshold {}".format(thresh)) i = 0 scores = [] mean_accuracy = 0
def plot2d(X, y, scale=True, normalize=False, embedding='pca', title=''): """ Plot data transformed into two dimensions by PCA. PCA transforms into a new embedding dimension such that the first dimension contains the maximal variance and following dimensions maximal remaining variance. This shoudl spread the observed n-dimensional data maximal. This is unsupervised and will not consider target values. """ if (scale): scaler = StandardScaler() X = scaler.fit_transform(X) if (normalize): normalizer = Normalizer(norm='l2') X = normalizer.fit_transform(X) if (embedding is 'pca'): pca = PCA(n_components=2) X_transformed = pca.fit_transform(X) elif (embedding is 'isomap'): isomap = Isomap(n_components=2, n_neighbors=20) X_transformed = isomap.fit_transform(X) elif (embedding is 'lle' ): lle = LocallyLinearEmbedding(n_components=2, n_neighbors=5) X_transformed = lle.fit_transform(X) elif (embedding is 'tsne'): t_sne = TSNE(n_components=2) X_transformed = t_sne.fit_transform(X) elif (embedding is 'spectral'): se = SpectralEmbedding(n_components=2) X_transformed = se.fit_transform(X) elif (embedding is 'mds'): mds = MDS(n_components=2) X_transformed = mds.fit_transform(X) elif (embedding is 'gallery'): plt.figure(1) plt.subplot(231) plt.title('pca') X_t = PCA(n_components=2).fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(232) plt.title('isomap') X_t = Isomap(n_neighbors=20).fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(233) plt.title('lle') X_t = LocallyLinearEmbedding(n_neighbors=20).fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(234) plt.title('tsne') X_t = TSNE().fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(235) plt.title('spectral') X_t = SpectralEmbedding().fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(236) plt.title('mds') X_t = MDS().fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.suptitle('Gallery transforms ' + title) return plt else: raise ValueError("Choose between pca, isomap and tsne") plt.title(title + ' ' + embedding + ' plot') sc = plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y) plt.colorbar(sc) return plt
# title is your chart title # x is the principal component you want displayed on the x-axis, Can be 0 or 1 # y is the principal component you want displayed on the y-axis, Can be 1 or 2 # # .. your code here .. from sklearn.decomposition import PCA pca = PCA(n_components=3) pca.fit(df) T = pca.transform(df) Plot2D(T, "PCA 1 2", 1, 2) # # TODO: Implement Isomap here. Reduce the dataframe df down # to THREE components. Once you've done that, call Plot2D using # the first two components. # # .. your code here .. from sklearn.manifold import Isomap imap = Isomap(n_neighbors=8, n_components=3) imap.fit(df) T2 = imap.transform(df) Plot2D(T2, "Isomap", 1, 2) # # TODO: If you're up for a challenge, draw your dataframes in 3D # Even if you're not, just do it anyway. # # .. your code here .. plt.show()
return data_n def scatter_3d(X, y): fig = plt.figure(figsize=(6, 5)) ax = fig.add_subplot(111, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.hot) ax.view_init(10, -70) ax.set_xlabel("$x_1$", fontsize=18) ax.set_ylabel("$x_2$", fontsize=18) ax.set_zlabel("$x_3$", fontsize=18) plt.show() if __name__ == '__main__': X, Y = make_s_curve(n_samples=500, noise=0.1, random_state=42) data_1 = my_Isomap(X, 2, 10) data_2 = Isomap(n_neighbors=10, n_components=2).fit_transform(X) plt.figure(figsize=(8, 4)) plt.subplot(121) plt.title("my_Isomap") plt.scatter(data_1[:, 0], data_1[:, 1], c=Y) plt.subplot(122) plt.title("sklearn_Isomap") plt.scatter(data_2[:, 0], data_2[:, 1], c=Y) plt.savefig("Isomap1.png") plt.show()
# Build the output arrays cells = opts.high / opts.step isomap_gmm_results = np.zeros((cells,opts.iters)) D = scale(X) n_samples, n_features = D.shape # chosen by hyperparam search in a separate test. n_neighbors = 10 # For the specified number of principal components, do the clustering dimension_list = range(opts.low, opts.high + 1, opts.step) data_files = [] for i in dimension_list: index = (i / opts.step) - 1 isomap = Isomap(n_neighbors, n_components=i) X_iso = isomap.fit_transform(D) for j in range(0,opts.iters,1): gaussmix = GMM(n_components=true_k, covariance_type='tied', n_init=10, n_iter=1000) gaussmix.fit(X_iso) gaussmix_labels = gaussmix.predict(X_iso) homog = metrics.homogeneity_score(labels[:,0], gaussmix_labels) print "Homogeneity: %0.3f" % homog test_result = {"Model": 'Isomap', "Dimension": i, "Homogeneity": homog, "Trial": j} index = pd.Index([0], name='rows') data_files.append(pd.DataFrame(data=test_result,index=index)) print "...Done" print "...rbinding DataFrames" master_df = data_files[0]
def embedDistanceMatrix(dmatDf, method='kpca', n_components=2, **kwargs): """Two-dimensional embedding of sequence distances in dmatDf, returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca, sklearn-tsne""" if isinstance(dmatDf, pd.DataFrame): dmat = dmatDf.values else: dmat = dmatDf if method == 'isomap': isoObj = Isomap(n_neighbors=10, n_components=n_components) xy = isoObj.fit_transform(dmat) elif method == 'mds': mds = MDS(n_components=n_components, max_iter=3000, eps=1e-9, random_state=15, dissimilarity="precomputed", n_jobs=1) xy = mds.fit(dmat).embedding_ rot = PCA(n_components=n_components) xy = rot.fit_transform(xy) elif method == 'pca': pcaObj = PCA(n_components=None) xy = pcaObj.fit_transform(dmat)[:, :n_components] elif method == 'kpca': pcaObj = KernelPCA(n_components=dmat.shape[0], kernel='precomputed', eigen_solver='dense') try: gram = dist2kernel(dmat) except: print( 'Could not convert dmat to kernel for KernelPCA; using 1 - dmat/dmat.max() instead' ) gram = 1 - dmat / dmat.max() xy = pcaObj.fit_transform(gram)[:, :n_components] elif method == 'lle': lle = LocallyLinearEmbedding(n_neighbors=30, n_components=n_components, method='standard') xy = lle.fit_transform(dist) elif method == 'sklearn-tsne': tsneObj = TSNE(n_components=n_components, metric='precomputed', random_state=0, perplexity=kwargs['perplexity']) xy = tsneObj.fit_transform(dmat) else: print(('Method unknown: %s' % method)) return assert xy.shape[0] == dmatDf.shape[0] xyDf = pd.DataFrame(xy[:, :n_components], index=dmatDf.index, columns=np.arange(n_components)) if method == 'kpca': """Not sure how negative eigenvalues should be handled here, but they are usually small so it shouldn't make a big difference""" xyDf.explained_variance_ = pcaObj.lambdas_[:n_components] / pcaObj.lambdas_[ pcaObj.lambdas_ > 0].sum() return xyDf
def cluster_manifold_in_embedding(hl, y, n_clusters, save_dir, visualize): # find manifold on autoencoded embedding if args.manifold_learner == 'UMAP': md = float(args.umap_min_dist) hle = umap.UMAP(random_state=0, metric=args.umap_metric, n_components=args.umap_dim, n_neighbors=args.umap_neighbors, min_dist=md).fit_transform(hl) elif args.manifold_learner == 'LLE': hle = LocallyLinearEmbedding( n_components=args.umap_dim, n_neighbors=args.umap_neighbors).fit_transform(hl) elif args.manifold_learner == 'tSNE': hle = TSNE(n_components=args.umap_dim, n_jobs=16, random_state=0, verbose=0).fit_transform(hl) elif args.manifold_learner == 'isomap': hle = Isomap( n_components=args.umap_dim, n_neighbors=5, ).fit_transform(hl) # clustering on new manifold of autoencoded embedding if args.cluster == 'GMM': gmm = mixture.GaussianMixture(covariance_type='full', n_components=n_clusters, random_state=0) gmm.fit(hle) y_pred_prob = gmm.predict_proba(hle) y_pred = y_pred_prob.argmax(1) elif args.cluster == 'KM': km = KMeans(init='k-means++', n_clusters=n_clusters, random_state=0, n_init=20) y_pred = km.fit_predict(hle) elif args.cluster == 'SC': sc = SpectralClustering(n_clusters=n_clusters, random_state=0, affinity='nearest_neighbors') y_pred = sc.fit_predict(hle) y_pred = np.asarray(y_pred) y_pred = y_pred.reshape(len(y_pred), ) y = np.asarray(y) y = y.reshape(len(y), ) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | " + args.manifold_learner + " on autoencoded embedding with " + args.cluster + " - N2D") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") if visualize: plt.scatter(*zip(*hle[:, :2]), c=y, label=y) plt.savefig(save_dir + '/' + args.dataset + '-n2d.png') plt.clf() return y_pred, acc, nmi, ari
tmp3 = np.vstack(tmp3) tmp2 = [] for rates in allrates['rnd']: tmp2.append(rates.rolling(window=100,win_type='gaussian',center=True,min_periods=1,axis=0).mean(std=2).values) tmp2 = np.vstack(tmp2) n = len(tmp1) tmp = np.vstack((tmp1, tmp3)) sys.exit() imap = Isomap(n_neighbors = 100, n_components = 2, n_jobs = -1).fit_transform(tmp) iwak = imap[0:n] isws = imap[n:] iswr = imap[n:] tokeep = np.where(np.logical_and(times>=-500,times<=500))[0] iswr = iswr.reshape(len(rip_tsd),len(tokeep),2) tmp = np.vstack((tmp1, tmp2)) imap2 = Isomap(n_neighbors = 100, n_components = 2, n_jobs = -1).fit_transform(tmp) iwak2 = imap2[0:n] irand = imap2[n:]
import seaborn as sns """Get the projection of the n-dimensional contextual embeddings into a lower dimensional space using differnt dimensionality reduction techniques. NOTE: In the report, the results for TSNE, PCA, and multidimensional scaling is provided because the other techniques do not provide exciting results. """ matrix = np.array(avg_embs) # tsne = TSNE(n_components=2, perplexity=5.0, early_exaggeration=12.0, metric='cosine', init='pca').fit_transform(matrix) tsne = TSNE(n_components=2).fit_transform(matrix) pca = PCA(n_components=2).fit_transform(matrix) lle = LocallyLinearEmbedding().fit_transform(matrix) mds = MDS(dissimilarity='euclidean').fit_transform(matrix) isomap = Isomap().fit_transform(matrix) spectral = SpectralEmbedding().fit_transform(matrix) import numpy import matplotlib.pyplot as plt import seaborn as sns tsne[0] tsne_df = pd.DataFrame({'X': tsne[:, 0], 'Y': tsne[:, 1]}) pca_df = pd.DataFrame({'X': pca[:, 0], 'Y': pca[:, 1]}) lle_df = pd.DataFrame({'X': lle[:, 0], 'Y': lle[:, 1]}) mds_df = pd.DataFrame({'X': mds[:, 0], 'Y': mds[:, 1]}) isomap_df = pd.DataFrame({'X': isomap[:, 0], 'Y': isomap[:, 1]}) spectral_df = pd.DataFrame({'X': spectral[:, 0], 'Y': spectral[:, 1]})
def __init__(self, x_data): self._x_data = x_data self._x_iso = Isomap(n_neighbors=10).fit_transform(x_data)
save_fig("lle_unrolling_plot") plt.show() print( '------------------------------------------------------------------------------------------------------\n' ' 8.6 MDS(multidimensional scaling), Isomap and t-SNE(t-distributed stochastic neighbor embedding) \n' '------------------------------------------------------------------------------------------------------\n' ) # MDS(multidimensional scaling) mds = MDS(n_components=2, random_state=42) X_reduced_mds = mds.fit_transform(X) # Isomap isomap = Isomap(n_components=2) X_reduced_isomap = isomap.fit_transform(X) # t-SNE(t-distributed stochastic neighbor embedding) tsne = TSNE(n_components=2, random_state=42) X_reduced_tsne = tsne.fit_transform(X) # LinearDiscriminantAnalysis lda = LinearDiscriminantAnalysis(n_components=2) X_mnist = mnist["data"] y_mnist = mnist["target"] lda.fit(X_mnist, y_mnist) X_reduced_lda = lda.transform(X_mnist) # titles = ["MDS", "Isomap", "t-SNE"]
#scaler = preprocessing.KernelCenterer() #0.915254237288 scaler = preprocessing.StandardScaler() #0.966101694915 scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) #pcaComponent = 4 #pca = PCA(n_components=pcaComponent) #pca.fit(X_train) #X_train = pca.transform(X_train) #X_test = pca.transform(X_test) neighbors = 2 components = 4 isomap = Isomap(n_neighbors=neighbors, n_components=components) isomap.fit(X_train) X_train = isomap.transform(X_train) X_test = isomap.transform(X_test) #svc = SVC() #svc.fit(X_train, y_train) #print svc.score(X_test, y_test) best_score = 0 best_C = 0 best_gamma = 0 for C in np.arange(0.05, 2.05, 0.05): for gamma in np.arange(0.001, 1.001, 0.001): svc = SVC(C = C, gamma = gamma) svc.fit(X_train, y_train)
def plot2D_classification(self, query=None, colors=None, markers=['*', 'v', 'o', '+', '-', '.', ',']): X, y = self.__check_data_available() n_row, n_col = X.shape import matplotlib.pyplot as plt import matplotlib as mpl c_map = plt.cm.get_cmap("hsv", self._nb_clazz + 1) colors = dict((self._clazz[idx], c_map(idx)) for idx in range(0, self._nb_clazz)) \ if colors is None else colors markers = dict((self._clazz[idx], markers[idx]) for idx in range(0, self._nb_clazz)) def plot_constraints(lower, upper, _linestyle="solid"): plt.plot([lower[0], lower[0], upper[0], upper[0], lower[0]], [lower[1], upper[1], upper[1], lower[1], lower[1]], linestyle=_linestyle) plt.grid() def plot2D_scatter(X, y): for row in range(0, len(y)): plt.scatter(X[row, 0], X[row, 1], marker=markers[y[row]], c=colors[y[row]]) def plot_ellipse(splot, mean, cov, color): from scipy import linalg v, w = linalg.eigh(cov) u = w[0] / linalg.norm(w[0]) angle = np.arctan(u[1] / u[0]) angle = 180 * angle / np.pi ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5, 180 + angle, facecolor="none", edgecolor=color, linewidth=2, zorder=2) ell.set_clip_box(splot.bbox) ell.set_alpha(0.9) splot.add_artist(ell) if n_col == 2: for clazz in self._clazz: post_mean_lower = self._mean_lower[clazz] post_mean_upper = self._mean_upper[clazz] plot_constraints(post_mean_lower, post_mean_upper) mean = self.get_mean_by_clazz(clazz) prior_mean_lower = mean - self.ell prior_mean_upper = mean + self.ell plot_constraints(prior_mean_lower, prior_mean_upper, _linestyle="dashed") if query is not None: ml_mean, ml_cov, ml_prob = self.fit_max_likelihood(query) plt.plot([query[0]], [query[1]], marker='h', markersize=5, color="black") _, _bounds = self.evaluate(query) for clazz in self._clazz: plt.plot([ml_mean[clazz][0]], [ml_mean[clazz][1]], marker='o', markersize=5, color=colors[clazz]) _, est_mean_lower = _bounds[clazz]['inf'] _, est_mean_upper = _bounds[clazz]['sup'] plt.plot([est_mean_lower[0]], [est_mean_lower[1]], marker='x', markersize=4, color="black") plt.plot([est_mean_upper[0]], [est_mean_upper[1]], marker='x', markersize=4, color="black") cov, inv, det = self.__cov_group_sample() s_plot = plt.subplot() for clazz in self._clazz: mean = self.get_mean_by_clazz(clazz) plot_ellipse(s_plot, mean, cov, colors[clazz]) elif n_col > 2: if query is not None: inference, _ = self.evaluate(query) X = np.vstack([X, query]) y = np.append(y, inference[0]) from sklearn.manifold import Isomap iso = Isomap(n_components=2) projection = iso.fit_transform(X) X = np.c_[projection[:, 0], projection[:, 1]] if query is not None: color_instance = colors[inference[0]] if len(inference) == 1 else 'black' plt.plot([X[n_row, 0]], [X[n_row, 1]], color='red', marker='o', mfc=color_instance) else: raise Exception("Not implemented for one feature yet.") plot2D_scatter(X, y) plt.show()
XX_train, yy_train = mnist.data / 255., mnist.target X_train=[] y_train=[] for i, label in enumerate(yy_train): if label in mytargets: X_train.append(XX_train[i]) y_train.append(yy_train[i]) num_samples_to_plot = 5000 X_train, y_train = shuffle(X_train, y_train) X_train, y_train = X_train[:num_samples_to_plot], y_train[:num_samples_to_plot] # lets subsample a bit for a first impression for digit in mytargets: instances=[i for i in y_train if i==digit] print "Digit",digit,"appears ",len(instances), "times" transformer = Isomap(n_neighbors = 10, n_components = 2) fig, plot = plt.subplots() fig.set_size_inches(50, 50) plt.prism() X_transformed = transformer.fit_transform(X_train) plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train) plot.set_xticks(()) plot.set_yticks(()) count=0; plt.tight_layout() plt.suptitle("Isomap for MNIST digits ") for label , x, y in zip(y_train, X_transformed[:, 0], X_transformed[:, 1]): #Lets annotate every 1 out of 200 samples, otherwise graph will be cluttered with anotations if count % 200 == 0:
def apply_isomapEmbedding(self, X_train, X_test): """Returns the embedded points for Isomap.""" embedding = Isomap(n_components =2, n_jobs=-1) X_train = embedding.fit_transform(X_train) X_test = embedding.transform(X_test) return X_train, X_test
def eval_other_methods(x, y): gmm = mixture.GaussianMixture(covariance_type='full', n_components=args.n_clusters, random_state=0) gmm.fit(x) y_pred_prob = gmm.predict_proba(x) y_pred = y_pred_prob.argmax(1) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | GMM clustering on raw data") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") y_pred = KMeans(n_clusters=args.n_clusters, random_state=0).fit_predict(x) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | K-Means clustering on raw data") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") sc = SpectralClustering(n_clusters=args.n_clusters, random_state=0, affinity='nearest_neighbors') y_pred = sc.fit_predict(x) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | Spectral Clustering on raw data") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") if args.manifold_learner == 'UMAP': md = float(args.umap_min_dist) hle = umap.UMAP(random_state=0, metric=args.umap_metric, n_components=args.umap_dim, n_neighbors=args.umap_neighbors, min_dist=md).fit_transform(x) elif args.manifold_learner == 'LLE': from sklearn.manifold import LocallyLinearEmbedding hle = LocallyLinearEmbedding( n_components=args.umap_dim, n_neighbors=args.umap_neighbors).fit_transform(x) elif args.manifold_learner == 'tSNE': method = 'exact' hle = TSNE(n_components=args.umap_dim, n_jobs=16, random_state=0, verbose=0).fit_transform(x) elif args.manifold_learner == 'isomap': hle = Isomap( n_components=args.umap_dim, n_neighbors=5, ).fit_transform(x) gmm = mixture.GaussianMixture(covariance_type='full', n_components=args.n_clusters, random_state=0) gmm.fit(hle) y_pred_prob = gmm.predict_proba(hle) y_pred = y_pred_prob.argmax(1) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | GMM clustering on " + str(args.manifold_learner) + " embedding") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") plt.scatter(*zip(*hle[:, :2]), c=y, label=y) plt.savefig(args.save_dir + '/' + args.dataset + '-' + str(args.manifold_learner) + '.png') plt.clf() y_pred = KMeans(n_clusters=args.n_clusters, random_state=0).fit_predict(hle) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | K-Means " + str(args.manifold_learner) + " embedding") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") sc = SpectralClustering(n_clusters=args.n_clusters, random_state=0, affinity='nearest_neighbors') y_pred = sc.fit_predict(hle) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | Spectral Clustering on " + str(args.manifold_learner) + " embedding") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================")
# Infer trajectory #### # run topslam from sklearn.manifold import TSNE, LocallyLinearEmbedding, SpectralEmbedding, Isomap from sklearn.decomposition import FastICA, PCA n_components = p["n_components"] methods = { 't-SNE': TSNE(n_components=n_components), 'PCA': PCA(n_components=n_components), 'Spectral': SpectralEmbedding(n_components=n_components, n_neighbors=p["n_neighbors"]), 'Isomap': Isomap(n_components=n_components, n_neighbors=p["n_neighbors"]), 'ICA': FastICA(n_components=n_components) } method_names = sorted(methods.keys()) method_names_selected = [ method_names[i] for i, selected in enumerate(p["dimreds"]) if selected ] methods = { method_name: method for method_name, method in methods.items() if method_name in method_names_selected } # dimensionality reduction X_init, dims = run_methods(expression, methods)
X = df.iloc[:,1:-1].as_matrix() y = df.iloc[:,-1:].as_matrix() #################################################################### # # # ###### randomized principal component analysis for dimensionality reduction of alt set ######## # # # The purpose is to find a way to effectively label our data, since labeling based solely on # # # perceptual criteria (meaning, just listening to the sounds and judging to which instrument they should # # # be assigned) does not work well enough. # # from sklearn.decomposition import RandomizedPCA as RandPCA # # pca = RandPCA(n_components = 30) # # X = pca.fit_transform(X) from sklearn.manifold import Isomap isomap = Isomap(n_components=30) X = isomap.fit_transform(X) #################################################################### ############ cluster the alternative set into 17 clusters, using KMeans ########## clstrer = KMeans(n_clusters = 17) clstr = clstrer.fit_predict(X) #################################################################### ########### names will be filled with the wav files' filenames ################ pardir = '../database/all_recorded_and_downloaded_alt_sounds_processed' names = np.array([])
ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green') X = digits.data print(X.shape) # представляем массив пиксело длиной 64 элемента y = digits.target print(y.shape) # Итого получили 1797 выборок и 64 признака # 1. Обучение без учителя: понижение размерности # Преобразуем данные в двумерный вид from sklearn.manifold import Isomap # Алгоритм обучения на базе многообразий iso = Isomap(n_components=2) # Понижение количества измерений до 2 iso.fit(digits.data) data_projected = iso.transform(digits.data) print(data_projected.shape) # Посторим график данных plt.scatter(data_projected[:, 0], data_projected[:, 1], c=digits.target, edgecolors='none', alpha=0.5, cmap=plt.cm.get_cmap("Spectral", 10)) plt.colorbar(label='digit label', ticks=range(10)) plt.clim(-0.5, 9.5) # 2. Классификация цифр
def isoMap(X, y): im = Isomap(n_components = 1, eigen_solver = "dense", n_neighbors = 20) im.fit(X) transformX = im.transform(X) return transformX
# read in the data X = pd.read_csv("X ansur.csv") # standardize the data to take on values between 0 and 1 X = (X - X.min()) / (X.max() - X.min()) # separate the data into training and testing np.random.seed(1) test_idx = np.random.choice(a=X.index.values, size=int(X.shape[0] / 5), replace=False) train_idx = np.array(list(set(X.index.values) - set(test_idx))) # train a isomap model n_comp = 1 # number of components component = Isomap(n_components=n_comp, n_neighbors=5, n_jobs=1) component.fit(X.iloc[train_idx, :]) # compute components for all the data, add cluster labels and train/test labels components = pd.DataFrame(component.transform(X), columns=["IC" + str(i + 1) for i in range(n_comp)]) components["Data"] = "Train" for j in test_idx: components.loc[j, "Data"] = "Test" # components.to_csv("isomap.csv", index=False) # combine the data and components data = pd.concat([X, components], axis=1) # train a random forest to learn the clusters model = RandomForestRegressor(n_estimators=50,
def isomap(features, n_components=2): return Isomap(n_components=n_components, n_jobs=-1).fit_transform(features)
split = 45 X, y = np.float64(subimages), np.float64(subimages2) del subimages, subimages2 #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) X_train, X_test = X[:split], X[split:] y_train, y_test = y[:split], y[split:] pca1 = PCA(n_components=2, svd_solver='auto', random_state=1) ica1 = FastICA(n_components=20, random_state=1) tsne1 = TSNE(n_components=2, random_state=1, method='exact') fa1 = FactorAnalysis(n_components=5, random_state=1) embedding = Isomap(n_components=20) reducer = umap.UMAP(n_components=30, random_state=1) #X_train_t =pca1.fit_transform(X_train) #X_train_t =ica1.fit_transform(X_train) #X_train_t = fa1.fit_transform(X_train) #X_train_t=tsne1.fit_transform(X_train) #X_train_t=embedding.fit_transform(X_train) #X_train_t =reducer.fit_transform(X_train) sc1 = MinMaxScaler() #X_train_t =sc1.fit_transform(X_train_t) X_train_t = X_train ## convert your array into a dataframe df = pd.DataFrame(X_train)
pca = PCA(n_components=3) pca.fit(df) T = pca.transform(df) Plot2D(T, 'chart title', 1,2) # # TODO: Implement Isomap here. Reduce the dataframe df down # to THREE components. Once you've done that, call Plot2D using # the first two components. # # .. your code here .. from sklearn.manifold import Isomap im = Isomap(n_components=3) im.fit(df) T = im.transform(df) Plot2D(T, 'chart title', 1,2) # # TODO: If you're up for a challenge, draw your dataframes in 3D # Even if you're not, just do it anyway. # # .. your code here .. fig = plt.figure() ax = fig.add_subplot(111,projection="3d") ax.set_xlabel('0') ax.set_ylabel('1')
digits.target.shape) # (1797, 64) (1797, 8, 8) (1797,) fig, ax = plt.subplots(6, 6, subplot_kw=dict(xticks=[], yticks=[]), gridspec_kw=dict(hspace=0.1, wspace=0.1)) for i, axi in enumerate(ax.flat): axi.imshow(digits.images[i], cmap='binary') # 插补 interpolation='nearest' axi.text(0.05, 0.05, str(digits.target[i]), color='g', transform=axi.transAxes) # dimensionality reduction iso = Isomap(n_components=2) new = iso.fit_transform(digits.data) print(new.shape) sns.set(style='whitegrid') plt.figure() plt.scatter(new[:, 0], new[:, 1], c=digits.target, cmap=plt.cm.get_cmap('Spectral', 10), edgecolor='none', alpha=0.6) plt.colorbar(label='Digits', ticks=range(10), extend='both') plt.clim(-0.5, 9.5) # classification model = RFC(n_estimators=400)
pca = PCA(n_components=2) data_pca = pca.fit_transform(data) plt.scatter(data_pca[:, 0], data_pca[:, 1], c=target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', 2)) plt.colorbar(); ## PCA能量 sb.set() pca_ = PCA().fit(data) plt.plot(np.cumsum(pca_.explained_variance_ratio_)) plt.xlabel('number of components') plt.ylabel('cumulative explained variance'); plt.xlim(0,5) ## IsoMap降维 from sklearn.manifold import Isomap iso = Isomap(n_components=2) data_projected = iso.fit_transform(data) plt.scatter(data_projected[:, 0], data_projected[:, 1], c=target,edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', 2)); plt.colorbar(label='Cancer', ticks=range(2)) plt.clim(-200, 0) ### KNN from sklearn.neighbors import KNeighborsClassifier from sklearn.grid_search import GridSearchCV clf = KNeighborsClassifier() n_neighbors = [1,2,3,5,8,10,15,20,25,30,35,40] weights = ['uniform','distance'] param_grid = [{'n_neighbors': n_neighbors, 'weights': weights}] grid_search = GridSearchCV(clf, param_grid=param_grid, cv=10) grid_search.fit(data, target)
ax.yaxis.set_major_formatter(plt.NullFormatter()) ax.set_xlabel('feature 1', color='gray') ax.set_ylabel('feature 2', color='gray') ax.set_title(title, color='gray') # make data X, y = make_swiss_roll(200, noise=0.5, random_state=42) X = X[:, [0, 2]] # visualize data fig, ax = plt.subplots() ax.scatter(X[:, 0], X[:, 1], color='gray', s=30) # format the plot format_plot(ax, 'Input Data') model = Isomap(n_neighbors=8, n_components=1) y_fit = model.fit_transform(X).ravel() # visualize data fig, ax = plt.subplots() pts = ax.scatter(X[:, 0], X[:, 1], c=y_fit, cmap='viridis', s=30) cb = fig.colorbar(pts, ax=ax) # format the plot format_plot(ax, 'Learned Latent Parameter') cb.set_ticks([]) cb.set_label('Latent Variable', color='gray') plt.show()