def test_spectral_embedding_unknown_affinity(seed=36): # Test that SpectralClustering fails with an unknown affinity type se = SpectralEmbedding(n_components=1, affinity="<unknown>", random_state=np.random.RandomState(seed)) with pytest.raises(ValueError): se.fit(S)
def cluster_responses(response_mat, n_clusters, corr_cut=0.6): """ Clusters the neuron responses using spectral clustering :param response_mat: The response matrix with all neuron responses :param n_clusters: The desired number of clusters :param corr_cut: The correlation cutoff to consider a given neuron to be part of a cluster :return: [0]: The cluster ids [1]: 3D embedding coordinates for plotting """ # create trial average response_mat = trial_average(response_mat, 3) # compute pairwise correlations pw_corrs = np.corrcoef(response_mat.T) pw_corrs[np.isnan(pw_corrs)] = 0 pw_corrs[pw_corrs < 0.2] = 0 # perform spectral clustering spec_clust = SpectralClustering(n_clusters, affinity="precomputed") clust_ids = spec_clust.fit_predict(pw_corrs) spec_emb = SpectralEmbedding(3, affinity="precomputed") coords = spec_emb.fit_transform(pw_corrs) # use correlation to cluster centroids to determine final cluster membership regressors = np.zeros((response_mat.shape[0], n_clusters)) for i in range(n_clusters): regressors[:, i] = np.mean(response_mat[:, clust_ids == i], 1) for i in range(response_mat.shape[1]): max_ix = -1 max_corr = 0 for j in range(n_clusters): c = np.corrcoef(response_mat[:, i], regressors[:, j])[0, 1] if c >= corr_cut and c > max_corr: max_ix = j max_corr = c clust_ids[i] = max_ix return clust_ids, coords
def se(X_train_scaled, X_test_scaled, num_components): # Locally Linear Embedding embedding = SpectralEmbedding(n_components=num_components) X_train_se = embedding.fit_transform(X_train_scaled) X_test_se = embedding.fit_transform(X_test_scaled) return X_train_se, X_test_se
class LaplacianEigenmaps(AbstractReducer): def __init__(self, d: int = 2, random_state: int = 0, **kwargs): super().__init__(d, random_state) self._main = SpectralEmbedding(n_components=d, random_state=random_state, **kwargs) def fit_transform(self, x: np.ndarray, **kwargs) -> np.ndarray: return self._main.fit_transform(x) def fit(self, x: np.ndarray, **kwargs): return self._main.fit(x) def transform(self, x: np.ndarray, **kwargs) -> np.ndarray: raise NotImplementedError def set_random_state(self, random_state: int = 0): self.random_state = random_state self._main.random_state = random_state @property def is_deterministic(self) -> bool: return False @property def is_stateful(self) -> bool: return True @staticmethod def get_parameter_ranges() -> dict: return {'n_neighbors': (int, 1, 20)}
def plot_md_scaling(mdist, nd=3): """ Plots the dimensionality rescaling of the distance matrix :param mdist: :param nd: :return: """ #mds = MDS(n_components=3, dissimilarity='precomputed') mds = SpectralEmbedding(n_components=nd, affinity='precomputed', n_neighbors=3) pdata = mds.fit_transform(mdist) fig = plt.figure(figsize=(10, 10)) if nd == 2: ax = fig.add_subplot(111) ax.scatter(pdata[:, 0], pdata[:, 1], cmap=plt.get_cmap("Blues")) else: ax = fig.add_subplot(111, projection='3d') ax.scatter(pdata[:, 0], pdata[:, 1], pdata[:, 2], depthshade=False, cmap=plt.get_cmap("Blues")) plt.show()
def spectral_embedding(file_name, dimension, num_neighbors, label): # aka Laplacian eigenmaps # finds a low dimensional representation of the data using a spectral decomposition of the graph Laplacian # graph = discrete approximation of the low dimensional manifold in the high dimensional space # minimization of the cost function based on the graph preserves local distances # 1. weighted graph construction # 2. graph Laplacian construction: unnormalized: L = D-A, normalized: L=D^{-1/2}(D-A)D^{-1/2} # 3. partial eigenvalue decomposition balls = np.loadtxt(file_name) matrix = balls[:, 0:dimension] new_matrix = convert_angles_to_cos_sin(matrix) s_embedding = SpectralEmbedding(n_components=2, affinity='nearest_neighbors', gamma=None, random_state=None, eigen_solver=None, n_neighbors=num_neighbors) transformed_matrix = s_embedding.fit_transform(new_matrix) ball_coords = np.zeros((balls.shape[0], dimension+3)) for i in xrange(balls.shape[0]): ball_coords[i, 0:dimension] = balls[i, 0:dimension].tolist() ball_coords[i, dimension:dimension+2] = transformed_matrix[i] if label == 'cluster': ball_coords[i, dimension+2] = balls[i, dimension].tolist() elif label == 'eq': ball_coords[i, dimension+2] = (-0.0019872041*300*np.log(abs(balls[i, dimension+1]))).tolist() elif label == 'committor': ball_coords[i, dimension+2] = (balls[i, dimension+2]/abs(balls[i, dimension+1])).tolist() print ' '.join([str(x) for x in ball_coords[i, :]])
def test_spectral_embedding_unknown_eigensolver(seed=36): # Test that SpectralClustering fails with an unknown eigensolver se = SpectralEmbedding(n_components=1, affinity="precomputed", random_state=np.random.RandomState(seed), eigen_solver="<unknown>") with pytest.raises(ValueError): se.fit(S)
def test_spectral_embedding_amg_solver(seed=36): # Test spectral embedding with amg solver pytest.importorskip('pyamg') se_amg = SpectralEmbedding(n_components=2, affinity="nearest_neighbors", eigen_solver="amg", n_neighbors=5, random_state=np.random.RandomState(seed)) se_arpack = SpectralEmbedding(n_components=2, affinity="nearest_neighbors", eigen_solver="arpack", n_neighbors=5, random_state=np.random.RandomState(seed)) embed_amg = se_amg.fit_transform(S) embed_arpack = se_arpack.fit_transform(S) assert _check_with_col_sign_flipping(embed_amg, embed_arpack, 1e-5) # same with special case in which amg is not actually used # regression test for #10715 # affinity between nodes row = [0, 0, 1, 2, 3, 3, 4] col = [1, 2, 2, 3, 4, 5, 5] val = [100, 100, 100, 1, 100, 100, 100] affinity = sparse.coo_matrix((val + val, (row + col, col + row)), shape=(6, 6)).toarray() se_amg.affinity = "precomputed" se_arpack.affinity = "precomputed" embed_amg = se_amg.fit_transform(affinity) embed_arpack = se_arpack.fit_transform(affinity) assert _check_with_col_sign_flipping(embed_amg, embed_arpack, 1e-5)
def dimension_reduce(): ''' This compares a few different methods of dimensionality reduction on the current dataset. ''' pca = PCA(n_components=2) # initialize a dimensionality reducer pca.fit(digits.data) # fit it to our data X_pca = pca.transform(digits.data) # apply our data to the transformation plt.subplot(1, 3, 1) plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target)# plot the manifold se = SpectralEmbedding() X_se = se.fit_transform(digits.data) plt.subplot(1, 3, 2) plt.scatter(X_se[:, 0], X_se[:, 1], c=digits.target) isomap = Isomap(n_components=2, n_neighbors=20) isomap.fit(digits.data) X_iso = isomap.transform(digits.data) plt.subplot(1, 3, 3) plt.scatter(X_iso[:, 0], X_iso[:, 1], c=digits.target) plt.show() plt.matshow(pca.mean_.reshape(8, 8)) # plot the mean components plt.matshow(pca.components_[0].reshape(8, 8)) # plot the first principal component plt.matshow(pca.components_[1].reshape(8, 8)) # plot the second principal component plt.show()
def test_spectral_embedding(): # Test chaining KNeighborsTransformer and SpectralEmbedding n_neighbors = 5 n_samples = 1000 centers = np.array([ [0.0, 5.0, 0.0, 0.0, 0.0], [0.0, 0.0, 4.0, 0.0, 0.0], [1.0, 0.0, 0.0, 5.0, 1.0], ]) S, true_labels = make_blobs(n_samples=n_samples, centers=centers, cluster_std=1., random_state=42) # compare the chained version and the compact version est_chain = make_pipeline( KNeighborsTransformer(n_neighbors=n_neighbors, mode='connectivity'), SpectralEmbedding(n_neighbors=n_neighbors, affinity='precomputed', random_state=42)) est_compact = SpectralEmbedding(n_neighbors=n_neighbors, affinity='nearest_neighbors', random_state=42) St_compact = est_compact.fit_transform(S) St_chain = est_chain.fit_transform(S) assert_array_almost_equal(St_chain, St_compact)
def __init__(self, n_nodes=None, npcs=0.8, embedding_dims=2, cov_estimator='corpcor', cov_reg=None, cov_indices=None, max_iter=10, sigma=0.01, lam=1., gamma=1., n_neighbors=30, just_tree=False): self.n_nodes = n_nodes self.cov_reg = cov_reg self.cov_estimator = cov_estimator self.cov_indices = cov_indices self.max_iter = max_iter self.sigma = sigma self.lam = lam self.gamma = gamma self.npcs = npcs self.n_neighbors = n_neighbors self.embedding = SpectralEmbedding(n_components=embedding_dims, affinity='precomputed') self.pca = PCA(n_components=self.npcs) self.just_tree = just_tree
def draw_feature_vecs(X, model, n_samples): # create data from same and different classes c, data1 = create_1_data(n_samples, X) _, data0 = create_0_data(n_samples, X, category=c) # isolate last layer of model before dense layer layer_output = model.layers[-2].output activation_model = keras.models.Model(inputs=model.input, outputs=layer_output) features1 = activation_model.predict(data1) features0 = activation_model.predict(data0) features = np.concatenate((features1, features0), axis=0) # create diffusion map embedding = SpectralEmbedding(n_components=2) features_transformed = embedding.fit_transform(features) # plot classes fig = plt.figure(figsize=(8, 6)) ax = fig.add_subplot(111) ax.scatter(features_transformed[:n_samples, 0], features_transformed[:n_samples, 1], c='r', s=10, label='same class (1)') ax.scatter(features_transformed[n_samples:, 0], features_transformed[n_samples:, 1], c='b', s=10, label='diff class (0)') plt.title('Feature vectors for inputs from same/different classes') ax.legend() plt.show()
def embed_spectral(train, test): traintest = np.concatenate((train, test)) from sklearn.manifold import SpectralEmbedding se = SpectralEmbedding(n_components=2, eigen_solver="arpack") X2d = se.fit_transform(traintest) X2d = MinMaxScaler().fit_transform(X2d) return X2d[:train.shape[0]], X2d[train.shape[0]:]
def sub_window_creation(self, images, kernels): gb_all_sw = [] label = [] for i in range(0, 100, 11): for j in range(0, 50, 11): for k in range(len(images)): image = images[k] sw_image = image[i:i+50, j:j+50] sw_image = cv2.resize(sw_image, dsize=(12, 12), interpolation=cv2.INTER_NEAREST) # print('sw size', sw_image.shape) gabored_image = Preprocessing.process(self, sw_image, kernels) # print('gab size', gabored_image.shape) # model = SpectralEmbedding(n_components=100, n_neighbors=10) # reduced_sw = model.fit_transform(gabored_image.reshape(-1, 1)) # print('gab size', gabored_image.reshape(1, -1).shape) # gb_all_sw.append(gabored_image) gb_all_sw.append(gabored_image) label.append(int(k/4)) # print('red size', reduced_sw.reshape(-1, 1).shape) # plt.imshow(image[i:i+50, j:j+50], cmap='gray') # plt.show() # plt.imshow(gabored_image, cmap='gray') # plt.show() print(len(gb_all_sw)) print(len(gb_all_sw[0])) # LEM demension reduction model = SpectralEmbedding(n_components=100, n_neighbors=10) # reduced_sw = model.fit_transform(gb_all_sw) reduced_sw = model.fit_transform(gb_all_sw) print('final', len(reduced_sw)) print('final', reduced_sw[0].shape) print(label)
def fit(self, X, y=None): """Fit the model from data in X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. Y: Ignored. Returns ------- self : object Returns the instance itself. """ logging.debug('Computing locally adjusted affinities.') d = dist.squareform(dist.pdist(X, metric=self.distance)) if 0 <= self.n_components <= 1: n_components = max(int(self.n_components * X.shape[1]), 1) else: n_components = self.n_components logging.debug('Computing embedding of affinities.') embedder = SpectralEmbedding(n_components=n_components, affinity='precomputed_nearest_neighbors', gamma=None, random_state=self.random_state, eigen_solver=self.eigen_solver, n_neighbors=self.n_neighbors, n_jobs=self.n_jobs) self.embedding_ = embedder.fit_transform(d) return self
def timeline_overlapped_exp(clpeaks, ncl, timepeaks, nfiles, gap=300, step=50 * 300, length=300 * 300, laplace=0.0): ldiffs = [] for exp, _, nfile in nfiles: lmtrans = compute_timeline_pmatrices(exp, clpeaks, ncl, timepeaks, gap=gap, step=step, length=length, laplace=laplace) print len(lmtrans) ldiffs.append(lmtrans) # for i in range(len(ldiffs)-1): # for j in range(i+1, len(ldiffs)): md = [] y = [] for l in ldiffs: md.extend(l) i = 0 for l in ldiffs: y.extend([nfiles[i][1]] * len(l)) i += 1 #print len(ldiffs[i]), len(ldiffs[j]), len(y), len(md) mdist = compute_trans_dist(md) fig = plt.figure() #mds = MDS(n_components=3, dissimilarity='precomputed') #mds = TSNE(n_components=3, perplexity=50.0, early_exaggeration=2.0, learning_rate=50.0, n_iter=2000, metric='precomputed') mds = SpectralEmbedding(n_components=3, affinity='precomputed', n_neighbors=5) X_new = mds.fit_transform(mdist) #print 'STRESS = ', mds.stress_ ax = fig.gca(projection='3d') plt.scatter(X_new[:, 1], X_new[:, 2], zs=X_new[:, 0], c=y, s=25) #plt.scatter(X_new[:, 0], X_new[:, 1], c=y) plt.show()
def make_spectral_plot(): file_out = "spectral" methods = { "Spectral NN": SpectralEmbedding(affinity="nearest_neighbors"), "Spectral RBF": SpectralEmbedding(affinity="rbf"), } make_plot_labeled(methods, file_out=file_out)
def _spectral_dbscan(fcd, n_dim=2, eps=0.3, min_samples=50): fcd = fcd - fcd.min() se = SpectralEmbedding(n_dim, affinity="precomputed") xi = se.fit_transform(fcd) pd = pdist(xi) eps = np.percentile(pd, int(100 * eps)) db = DBSCAN(eps=eps, min_samples=min_samples).fit(xi) return xi.T, db.labels_
def LapEigenmap(affinity_matrix, dim, random_state): if random_state is None: random_state = np.random.RandomState() component_embedding = SpectralEmbedding( n_components=dim, affinity="precomputed", random_state=random_state).fit_transform(affinity_matrix) component_embedding /= component_embedding.max() return component_embedding
def spectralembed(input, finaldim): from sklearn.manifold import SpectralEmbedding # - 'nearest_neighbors' : construct affinity matrix by knn graph # - 'rbf' : construct affinity matrix by rbf kernel # - 'precomputed' : interpret X as precomputed affinity matrix embedding = SpectralEmbedding(n_components=finaldim, affinity='rbf') X_transformed = embedding.fit_transform(input.todense().transpose()) return X_transformed, input.todense() * X_transformed
def component_layout(data, n_components, component_labels, dim, metric="euclidean", metric_kwds={}): """Provide a layout relating the separate connected components. This is done by taking the centroid of each component and then performing a spectral embedding of the centroids. Parameters ---------- data: array of shape (n_samples, n_features) The source data -- required so we can generate centroids for each connected component of the graph. n_components: int The number of distinct components to be layed out. component_labels: array of shape (n_samples) For each vertex in the graph the label of the component to which the vertex belongs. dim: int The chosen embedding dimension. metric: string or callable (optional, default 'euclidean') The metric used to measure distances among the source data points. metric_kwds: dict (optional, default {}) Keyword arguments to be passed to the metric function. Returns ------- component_embedding: array of shape (n_components, dim) The ``dim``-dimensional embedding of the ``n_components``-many connected components. """ component_centroids = np.empty((n_components, data.shape[1]), dtype=np.float64) for label in range(n_components): component_centroids[label] = data[component_labels == label].mean( axis=0) distance_matrix = pairwise_distances(component_centroids, metric=metric, **metric_kwds) affinity_matrix = np.exp(-distance_matrix**2) component_embedding = SpectralEmbedding( n_components=dim, affinity="precomputed").fit_transform(affinity_matrix) component_embedding /= component_embedding.max() return component_embedding
def plotSpectralEmbedding(X, Y, out_p, is_regr=False): X, Y = deepcopy(X), deepcopy(Y) pca = PCA(n_components=10) X = pca.fit_transform(X) sm = SpectralEmbedding(n_components=3, eigen_solver="arpack", n_neighbors=10, n_jobs=-1) X = sm.fit_transform(X) title = "Spectral Embedding" out_p += "spectral_embedding.png" plotClusterPairGrid(X, Y, out_p, 3, 1, title, is_regr)
def test_error_pyamg_not_available(): se_precomp = SpectralEmbedding( n_components=2, affinity="rbf", eigen_solver="amg", ) err_msg = "The eigen_solver was set to 'amg', but pyamg is not available." with pytest.raises(ValueError, match=err_msg): se_precomp.fit_transform(S)
def runSpectralEmbedding_KMeans(self): """ Run sklearn-SpectralEmbedding to reduce the dimensionality of the data Cluster embedding with K-Means """ spE = SpectralEmbedding(n_components=2) self.dspE = spE.fit_transform(self.dataset) self.kmeansSpE = KMeans(n_clusters=self.n_clusters, random_state=0).fit_predict(self.dspE) return self.dspE, self.kmeansSpE
def my_se(X, y=None, l1=.1, n_components=2, **kwargs): rrfs = RRFS(X.shape[1], hidden=n_components) model = SpectralEmbedding(n_components=n_components) codes = model.fit_transform(X) codes = (codes - np.min(codes)) / (np.max(codes) - np.min(codes)) #rrfs.train_representation_network(x_train, name=dataset+'_rep.hd5', epochs=1000) score = rrfs.train_fs_network(X, rep=codes, l1=l1, epochs=300, loss='mse') # sort the feature scores in an ascending order according to the feature scores idx = np.argsort(score)[::-1] return idx
def vis_se(feature): ''' visualize extracted features using Spectral Embedding ''' if len(feature.shape) == 5: feature = feature.reshape(feature.shape[:-4] + (-1, feature.shape[3], feature.shape[4])) feature = feature.reshape((feature.shape[0], -1)) elif len(feature.shape) == 4: feature = feature.reshape((feature.shape[0], -1)) se = SpectralEmbedding(n_neighbors=7, n_components=2) projected = se.fit_transform(feature) return projected
def get_spectral_embedding_prod(gram_matrix, n_dims=3, normalize=True, random_state=SEED): # The gram matrix is already an affinity; # but it has the undesirable quality of making high energy chords more similar than low energy chords # we normalise accordingly # Alternatively: RBF. See next fn inv_root_energy = 1.0 / np.maximum(np.sqrt(np.diagonal(gram_matrix)), 1) affinity = gram_matrix * np.outer(inv_root_energy, inv_root_energy) transformer = SpectralEmbedding(n_components=n_dims, affinity="precomputed", random_state=random_state) transformed = transformer.fit_transform(affinity) if normalize: transformed = normalize_var(transformed) return transformed
def affinity_matrix1(eigen_solver,n): n = int(n) # n value for nearest neighbor # 3D embedding transform created using default Euclidean metric embedding = SpectralEmbedding(n_components=3, affinity='nearest_neighbors', n_jobs=n_cpus, eigen_solver=eigen_solver, n_neighbors=n) data_transformed = embedding.fit_transform(data_df) # Transform data using embedding data_affinity = scipy.sparse.csr_matrix.toarray(embedding.affinity_matrix_) # Affinity matrix as a numpy array plot = hv.Image(data_affinity).opts(width=500, height=400, colorbar=True, cmap='Greys', title='Euclidean Affinity k='+str(n)) # Plot aray as image return plot
def get_spectral_embedding(adj, d): """ Given adj is N*N, return its feature mat N*D, D is fixed in model :param adj: :return: """ adj_ = adj.data.cpu().numpy() emb = SpectralEmbedding(n_components=d) res = emb.fit_transform(adj_) x = torch.from_numpy(res).float().cuda() return x
def md_scaling(mdist, nd=3): """ performs dimensionality reduction of the distance matrix :param mdist: :param nd: :return: """ nneig = int(np.sqrt(mdist.shape[0])) mds = SpectralEmbedding(n_components=nd, affinity='precomputed', n_neighbors=nneig) return mds.fit_transform(mdist)
def handle_cannot_link_constraints(X, d_transform, cannot_link_constraints, n, norm_p, spectral_embedding_components=None, sc_sigma=1): alpha = d_transform.max() d_p = np.power(d_transform, norm_p) if spectral_embedding_components is None: spectral_embedding_components = len(X[0]) sc_embedding = SpectralEmbedding( n_components=spectral_embedding_components, affinity="precomputed") affinity_mat = np.exp(-np.power(squareform(d_transform), 2) / (2 * (sc_sigma**2))) embedding = sc_embedding.fit_transform(affinity_mat) for c in cannot_link_constraints: i, j = c # make sure that alwas i > j if i < j: i, j = j, i e_i = embedding[i] e_j = embedding[j] for x in range(1, n): for y in range(x): if x == i and y == j: val = 2 else: e_x = embedding[x] e_y = embedding[y] # use l2 norm distance here. d_ex_ej = norm(e_x - e_j) d_ex_ei = norm(e_x - e_i) v_x = (d_ex_ej - d_ex_ei) / (d_ex_ej + d_ex_ei) d_ey_ej = norm(e_y - e_j) d_ey_ei = norm(e_y - e_i) v_y = (d_ey_ej - d_ey_ei) / (d_ey_ej + d_ey_ei) val = np.abs(v_x - v_y) d_p[square_to_condensed(x, y, n)] += np.power(val * alpha, norm_p) return np.power(d_p, 1 / float(norm_p))
def get_spectral_embedding_dist(dist_matrix, n_dims=3, gamma=0.0625, normalize=True, random_state=SEED): # see previous fn # this needs to be 64 bit for stability dist_matrix = dist_matrix.astype("float64") affinity = np.exp(-gamma * dist_matrix * dist_matrix) transformer = SpectralEmbedding(n_components=n_dims, affinity="precomputed", random_state=random_state) transformed = transformer.fit_transform(affinity) # natural scale is dicey on this one. rescale to uni-ish variance var = np.var(transformed, 0) mean = np.mean(transformed, 0) if normalize: transformed = normalize_var(transformed) return transformed
def se(features): """ Adds the two SE components to the features table. """ X = features.values se = SpectralEmbedding(n_components=2) Y = se.fit_transform(X) features['se1'] = Y[:, 0] features['se2'] = Y[:, 1] return features
def get_spectral_emb(adj, max_size): """ Given adj is N*N, return its feature mat N*D, D is fixed in model :param adj: adjacent matrix :param max_size: the amount of dimension to be embedded :return: spectral embedding of every node in this graph """ adj_ = adj.data.cpu().numpy() emb = SpectralEmbedding(n_components=max_size) res = emb.fit_transform(adj_) x = torch.from_numpy(res).float().cuda() return x
def init_hybrid(exp_mat, epi_mat, thresh, nclust): num_components = epi_mat.shape[1] - 1 corr_dist = 1.0 - squareform(pdist(exp_mat.transpose(), metric="cosine")) spec = SpectralEmbedding(n_components=num_components, affinity="precomputed") spec_coord = spec.fit_transform(corr_dist) epi_bin = (epi_mat > 0.0).astype(float) full_coord = np.concatenate((spec_coord, epi_bin), 1) kmeans = KMeans(n_clusters=nclust).fit(full_coord) tf_matrix = (kmeans.cluster_centers_[:, num_components:] > thresh).astype(float).transpose() gene_matrix = binarize_vector(kmeans.labels_, nclust) return tf_matrix, gene_matrix
def swiss_roll_test(): n_points = 1000 X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0) n_neighbors=20 n_components=2 # original lE algorithm t0 = time() ml_model = SpectralEmbedding(n_neighbors=n_neighbors, n_components=n_components) Y = ml_model.fit_transform(X) t1 = time() # 2d projection fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(5,10)) ax[0].scatter(Y[:,0], Y[:,1], c=color, label='scikit') ax[0].set_title('Sklearn-LE: {t:.2g}'.format(t=t1-t0)) # Jakes LPP Algorithm t0 = time() ml_model = LocalityPreservingProjection(n_components=n_components) ml_model.fit(X) Y = ml_model.transform(X) t1 = time() ax[1].scatter(Y[:,0], Y[:,1], c=color, label='Jakes Algorithm') ax[1].set_title('Jakes LPP: {t:.2g}'.format(t=t1-t0)) # my SSSE algorith, t0 = time() ml_model = LocalityPreservingProjections(weight='angle', n_components=n_components, n_neighbors=n_neighbors, sparse=True, eig_solver='dense') ml_model.fit(X) Y = ml_model.transform(X) t1 = time() ax[2].scatter(Y[:,0], Y[:,1], c=color, label='My LPP Algorithm') ax[2].set_title('My LPP: {t:.2g}'.format(t=t1-t0)) plt.show()
def demo(k): X, t = make_swiss_roll(noise=1) le = SpectralEmbedding(n_components=2, n_neighbors=k) le_X = le.fit_transform(X) ler = LER(n_components=2, n_neighbors=k, affinity='rbf') ler_X = ler.fit_transform(X, t) _, axes = plt.subplots(nrows=1, ncols=3, figsize=plt.figaspect(0.33)) axes[0].set_axis_off() axes[0] = plt.subplot(131, projection='3d') axes[0].scatter(*X.T, c=t, s=50) axes[0].set_title('Swiss Roll') axes[1].scatter(*le_X.T, c=t, s=50) axes[1].set_title('LE Embedding') axes[2].scatter(*ler_X.T, c=t, s=50) axes[2].set_title('LER Embedding') plt.show()
def swiss_roll_test(): import matplotlib.pyplot as plt plt.style.use('ggplot') from time import time from sklearn import manifold, datasets from sklearn.manifold import SpectralEmbedding n_points = 1000 X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0) n_neighbors=10 n_components=2 # original scikit-learn lE algorithm t0 = time() ml_model = SpectralEmbedding(affinity='nearest_neighbors', n_neighbors=n_neighbors, n_components=n_components) Y = ml_model.fit_transform(X) t1 = time() # 2d projection fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(5,10)) ax[0].scatter(Y[:,0], Y[:,1], c=color, label='scikit') ax[0].set_title('Sklearn LE: {t:.2g}'.format(t=t1-t0)) # my Laplacian Eigenmaps algorithm t0 = time() ml_model = LaplacianEigenmaps(n_components=n_components, n_neighbors=n_neighbors) ml_model.fit(X) Y = ml_model.fit_transform(X) t1 = time() ax[1].scatter(Y[:,0], Y[:,1], c=color, label='My LE Algorithm') ax[1].set_title('My LE: {t:.2g}'.format(t=t1-t0)) plt.show()
def plot2d(X, y, scale=True, normalize=False, embedding='pca', title=''): """ Plot data transformed into two dimensions by PCA. PCA transforms into a new embedding dimension such that the first dimension contains the maximal variance and following dimensions maximal remaining variance. This shoudl spread the observed n-dimensional data maximal. This is unsupervised and will not consider target values. """ if (scale): scaler = StandardScaler() X = scaler.fit_transform(X) if (normalize): normalizer = Normalizer(norm='l2') X = normalizer.fit_transform(X) if (embedding is 'pca'): pca = PCA(n_components=2) X_transformed = pca.fit_transform(X) elif (embedding is 'isomap'): isomap = Isomap(n_components=2, n_neighbors=20) X_transformed = isomap.fit_transform(X) elif (embedding is 'lle' ): lle = LocallyLinearEmbedding(n_components=2, n_neighbors=5) X_transformed = lle.fit_transform(X) elif (embedding is 'tsne'): t_sne = TSNE(n_components=2) X_transformed = t_sne.fit_transform(X) elif (embedding is 'spectral'): se = SpectralEmbedding(n_components=2) X_transformed = se.fit_transform(X) elif (embedding is 'mds'): mds = MDS(n_components=2) X_transformed = mds.fit_transform(X) elif (embedding is 'gallery'): plt.figure(1) plt.subplot(231) plt.title('pca') X_t = PCA(n_components=2).fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(232) plt.title('isomap') X_t = Isomap(n_neighbors=20).fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(233) plt.title('lle') X_t = LocallyLinearEmbedding(n_neighbors=20).fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(234) plt.title('tsne') X_t = TSNE().fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(235) plt.title('spectral') X_t = SpectralEmbedding().fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.subplot(236) plt.title('mds') X_t = MDS().fit_transform(X) plt.scatter(X_t[:,0 ], X_t[:, 1], c=y) plt.suptitle('Gallery transforms ' + title) return plt else: raise ValueError("Choose between pca, isomap and tsne") plt.title(title + ' ' + embedding + ' plot') sc = plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y) plt.colorbar(sc) return plt
############################################################################### # <codecell> ############################################################################### # 2 circles img = circle1 + circle2 mask = img.astype(bool) img = img.astype(float) img += 1 + 0.1 * np.random.randn(*img.shape) graph = image.img_to_graph(img, mask=mask) graph.data = np.exp(-graph.data / graph.data.std()) se = SpectralEmbedding(n_components=5,affinity='precomputed') Y = se.fit_transform(graph) for j in range(0,se.n_components): pl.subplot(1,se.n_components, j+1) label_im = -np.ones(mask.shape) label_im[mask] = Y[:,j] pl.title('Eigen Vec. %i' % (j+1), size=18) pl.imshow(label_im,cmap=pl.cm.Spectral) pl.show() pl.figure() pl.scatter(Y[:, 1], Y[:, 2], c=Y[:,0], cmap=pl.cm.Spectral) # <codecell>
def apply_spectral_embedding(proj_data, proj_weights=None): model = SpectralEmbedding(n_components=2, random_state=RANDOM_SEED) norm_data = normalize_columns(proj_data); result = model.fit_transform(norm_data.T); return result;
import numpy as np from sklearn.utils.arpack import eigsh app = service.prodbox.CinemaService() X = app.getWeightedSearchFeatures(15) graph = kneighbors_graph(X, 10) lap = graph_laplacian(graph, True) from sklearn.decomposition import TruncatedSVD svd = TruncatedSVD(n_components = 30, algorithm="arpack") lap = spectral_embedding_._set_diag(lap, 1) svd.fit(-lap) eigenvalues = np.diag(svd.components_ * (-lap).todense() * svd.components_.T) eigenvalues2, _ = eigsh(-lap, k=30, which='LM', sigma=1) print(eigenvalues) print(eigenvalues2) se = SpectralEmbedding(n_components = 30, eigen_solver='arpack', affinity="nearest_neighbors") se.fit(X) app.quit() # TODO : check budget distribution, draw budget conditionnaly out = connected_components(graph)
import sklearn.metrics from sklearn.decomposition import PCA (X, Y, frac0, frac1) = pickle.load( open( "mydataset.p", "rb" ) ) σ = 0.4 γ = 1.0/(2*σ*σ) # PCA pca = PCA(n_components=1, svd_solver='full') pca_out = pca.fit_transform(X) # KPCA embedding = SpectralEmbedding(n_components=1) K = sklearn.metrics.pairwise.rbf_kernel(X, gamma=γ) H = np.eye(800) - (1.0/800)*np.ones((800,800)) K = H.dot(K).dot(H) Xout = embedding.fit_transform(K) plt.figure(figsize=(10,3)) plt.subplot(131) plt.plot(X[Y == 0][:,0], X[Y == 0][:,1], 'go') plt.plot(X[Y == 1][:,0], X[Y == 1][:,1], 'bx') plt.xlabel('x') plt.ylabel('y') plt.title('Data in original space')
km_model = KMeans(n_clusters = K, n_init = 1) results_KM = np.zeros((trials, 3)) for i in range(trials): ypred = km_model.fit_predict(train_x) nmi = metrics.adjusted_mutual_info_score(train_y, ypred) ari = metrics.adjusted_rand_score(train_y, ypred) ac = acc(ypred, train_y) results_KM[i] = np.array([nmi, ari, ac]) KM_mean = np.mean(results_KM, axis = 0) KM_std = np.std(results_KM, axis = 0) # Perform SC print('SC started...') results_SC = np.zeros((trials, 3)) se_model = SpectralEmbedding(n_components=K, affinity='rbf', gamma = 0.1) se_vec = se_model.fit_transform(train_x) for i in range(trials): ypred = km_model.fit_predict(se_vec) nmi = metrics.adjusted_mutual_info_score(train_y, ypred) ari = metrics.adjusted_rand_score(train_y, ypred) ac = acc(ypred, train_y) results_SC[i] = np.array([nmi, ari, ac]) SC_mean = np.mean(results_SC, axis = 0) SC_std = np.std(results_SC, axis = 0) # for PenDigits, perform DCN and SAE+KM config = {'Init': '', 'lbd': .5, 'beta': 1,
geomMeanOfD = np.array(np.sqrt(val1 * val2)).reshape(1, -1) tmpAdj = (groupV * geomMeanOfD).dot(groupV.T) tmpRowMatrix.append(tmpAdj) tmpRowMatrix = np.hstack(tmpRowMatrix) simMatAllTimes.append(tmpRowMatrix) simMatAllTimes = np.vstack(simMatAllTimes) simMatAllTimes = setDiagToOne(forcePosDef(simMatAllTimes)) # tsneModel = TSNE(n_components=2, metric='precomputed', # method='barnes_hut', perplexity=30.0) # #method='barnes_hut' # #method='exact' # distMat = 1.0 / (simMatAllTimes + 1.01) tsneModel = SpectralEmbedding(n_components=2, affinity='precomputed', gamma=1.0, n_neighbors=6) distMat = simMatAllTimes + 1.0 tsneModel = tsneModel.fit(distMat) sizeScale = np.abs(tsneModel.embedding_.ravel()).max() tsneModel.embedding_ /= sizeScale ############################# # # Plotting # # set plot parameters
class BranchedEmbeddedGaussians(object): def __init__(self, n_nodes=None, npcs=0.8, embedding_dims=2, cov_estimator='corpcor', cov_reg=None, cov_indices=None, max_iter=10, sigma=0.01, lam=1., gamma=1., n_neighbors=30, just_tree=False): self.n_nodes = n_nodes self.cov_reg = cov_reg self.cov_estimator = cov_estimator self.cov_indices = cov_indices self.max_iter = max_iter self.sigma = sigma self.lam = lam self.gamma = gamma self.npcs = npcs self.n_neighbors = n_neighbors self.embedding = SpectralEmbedding(n_components=embedding_dims, affinity='precomputed') self.pca = PCA(n_components=self.npcs) self.just_tree = just_tree def fit(self, data_array): n_samples, n_dims = data_array.shape if self.n_nodes is None: self.n_nodes = 0.1 if type(self.n_nodes) == float: self.n_nodes = max(2, np.round(n_samples * self.n_nodes)) self._pca_tx = self.pca.fit_transform(data_array) self._affinity = adaptive_rbf_matrix(data_array, n_neighbors=self.n_neighbors) self._embedding_tx = self.embedding.fit_transform(self._affinity) pt = PrincipalGraph(gstruct='span-tree', gamma=self.gamma, sigma=self.sigma, max_iter=self.max_iter, lam=self.lam, n_nodes=self.n_nodes) pt.fit(self._embedding_tx) self._pt = pt self.graph = pt.graph self.node_positions = self._pt.node_positions if self.cov_indices is None: cov_indices = np.arange(0, n_samples) else: cov_indices = self.cov_indices if not self.just_tree: self.means, self.covariances = self._calculate_gaussian_params(data_array, self._pt._probabilities, cov_indices) def _map_samples_to_nodes(self, data_array, means, covs): mapping = np.zeros([data_array.shape[0]]) - 1 mapping_probs = np.zeros([data_array.shape[0], means.shape[0]]) for i in xrange(means.shape[0]): mapping_probs[:, i] = stats.multivariate_normal.pdf(data_array[:, self.cov_indices], mean=means[i, self.cov_indices], cov=covs[i, :, :], allow_singular=True) for i in xrange(data_array.shape[0]): mapping[i] = np.argmax(mapping_probs[i, :]) return mapping, mapping_probs def _calculate_gaussian_params(self, data_array, mapping_probs, cov_indices): means = np.zeros([self.n_nodes, data_array.shape[1]]) cov_dim = len(cov_indices) covariances = np.zeros([self.n_nodes, cov_dim, cov_dim]) for i in xrange(self.n_nodes): weights = mapping_probs[:, i] weights = np.reshape(weights, [len(weights), 1]) weighted_data = weights * data_array means[i, :] = weighted_data.sum(axis=0) if self.cov_reg is None: covariances[i, :, :] = np.copy(corpcor.cov_shrink(data_array[:, cov_indices], weights=weights)) else: covariances[i, :, :] = np.copy(corpcor.cov_shrink(data_array[: cov_indices], weights=weights, **{'lambda':self.cov_reg})) return means, covariances def predict_proba(self, data_array): mapping, mapping_probs = self._map_samples_to_nodes(data_array, self.means, self.covs) return mapping, mapping_probs def predict(self, data_array): mapping, _ = self.predict_proba(data_array, self.means, self.covs) return mapping