def fit(self, X): A = tools.kmeans_centroids(X, self.n_prototypes).cluster_centers_ self.W = rbf_kernel(A, A, gamma = 1./self.sigma2) self.H = rbf_kernel(X, A, gamma = 1./self.sigma2) self.W_dagger = np.linalg.pinv(self.W) d_tilde = self.H.dot(self.W_dagger.dot(self.H.T.sum(axis=1))) self.HtH = self.H.T.dot(self.H) self.HtSH = (self.H.T * d_tilde).dot(self.H) - self.HtH.dot(self.W_dagger).dot(self.HtH.T) self.n = X.shape[0]
def fit(self, X, kmeans_center=False): '''AnchorClound construction via Exact Gaussian Mixture''' gmm = mixture.GMM(n_components=self.n_clusters, covariance_type='full', min_covar=1e-2) if kmeans_center == True: # fix the GMM means to be the kmeans centers gmm.params = 'wc' gmm.init_params = 'wc' gmm.means_ = tools.kmeans_centroids(X, self.n_clusters).cluster_centers_ gmm.fit(X) self.Z = gmm.predict_proba(X)
def fit(self, X): '''AnchorGraph construction Variables: A: ancors via clustering Z: anchor embedings from X to A, where each row sums up to 1 ''' self.n = X.shape[0] A = tools.kmeans_centroids(X, self.n_clusters).cluster_centers_ nbrs = NearestNeighbors(n_neighbors = self.n_nbrs, metric='euclidean').fit(A) nbrs_distances, nbrs_idx = nbrs.kneighbors(X) nbrs_Z = tools.locally_anchor_embedding(X, A, nbrs_idx) self.Z = np.zeros((self.n, self.n_clusters)) self.Z[np.arange(self.n)[:,np.newaxis], nbrs_idx] = nbrs_Z
def fit(self, X): # find anchors centroids km = tools.kmeans_centroids(X, self.n_anchors) A = km.cluster_centers_ # find nearest datapoints for anchors start = time.time() _, nbrs_of_A = NearestNeighbors(n_neighbors = self.n_data_per_anchor).fit(X).kneighbors(A) print 'Nearest data search: %.3f secs' % (time.time() - start) # initialize anchors (including estimating local ppca models) start = time.time() anchors = [] for j in xrange(self.n_anchors): anchors.append(Anchor(A[j,:], X[nbrs_of_A[j,:],:], self.inner_dim)) # nbrs = np.where(km.labels_==j)[0] # nbr_samples = np.random.choice(nbrs, self.n_data_per_anchor) # anchors.append(Anchor(A[j,:], X[nbr_samples,:] * np.random.randint(2, size = X[nbr_samples,:].shape), self.inner_dim)) print 'Constructing clouds: %.3f secs' % (time.time() - start) # find nearest anchors for datapoints start = time.time() _, nbrs_of_X = NearestNeighbors(n_neighbors = self.n_anchor_per_data).fit(A).kneighbors(X) print 'Nearest anchor search: %.3f secs' % (time.time() - start) # compute probability assignment with the "exp-normalize" trick n = X.shape[0] start = time.time() T = np.zeros((n, self.n_anchor_per_data)) for i in xrange(n): for j in xrange(self.n_anchor_per_data): T[i,j] = anchors[nbrs_of_X[i,j]].log_ppca_density(X[i,:]) T = normalize(np.exp(T - np.max(T, axis=1)[:,np.newaxis]), axis=1, norm='l1') self.Z = np.zeros((n, self.n_anchors)) self.Z[np.arange(n)[:,np.newaxis], nbrs_of_X] = T print 'Constructing Z: %.3f secs' % (time.time() - start)