def fit(self, X):
        A = tools.kmeans_centroids(X, self.n_prototypes).cluster_centers_
        self.W = rbf_kernel(A, A, gamma = 1./self.sigma2)
        self.H = rbf_kernel(X, A, gamma = 1./self.sigma2)

        self.W_dagger = np.linalg.pinv(self.W)

        d_tilde = self.H.dot(self.W_dagger.dot(self.H.T.sum(axis=1)))
        self.HtH = self.H.T.dot(self.H)
        self.HtSH = (self.H.T * d_tilde).dot(self.H) - self.HtH.dot(self.W_dagger).dot(self.HtH.T)
        self.n = X.shape[0]
예제 #2
0
    def fit(self, X, kmeans_center=False):
        '''AnchorClound construction via Exact Gaussian Mixture'''

        gmm = mixture.GMM(n_components=self.n_clusters, covariance_type='full', min_covar=1e-2)

        if kmeans_center == True:
            # fix the GMM means to be the kmeans centers
            gmm.params = 'wc'
            gmm.init_params = 'wc'
            gmm.means_ = tools.kmeans_centroids(X, self.n_clusters).cluster_centers_

        gmm.fit(X)

        self.Z = gmm.predict_proba(X)
예제 #3
0
    def fit(self, X):
        '''AnchorGraph construction

        Variables:
            A: ancors via clustering
            Z: anchor embedings from X to A, where each row sums up to 1
        '''
        self.n = X.shape[0]

        A = tools.kmeans_centroids(X, self.n_clusters).cluster_centers_

        nbrs = NearestNeighbors(n_neighbors = self.n_nbrs, metric='euclidean').fit(A)
        nbrs_distances, nbrs_idx = nbrs.kneighbors(X)

        nbrs_Z = tools.locally_anchor_embedding(X, A, nbrs_idx)

        self.Z = np.zeros((self.n, self.n_clusters))
        self.Z[np.arange(self.n)[:,np.newaxis], nbrs_idx] = nbrs_Z
예제 #4
0
    def fit(self, X):
        # find anchors centroids
        km = tools.kmeans_centroids(X, self.n_anchors)
        A = km.cluster_centers_

        # find nearest datapoints for anchors
        start = time.time()
        _, nbrs_of_A = NearestNeighbors(n_neighbors = self.n_data_per_anchor).fit(X).kneighbors(A)
        print 'Nearest data search: %.3f secs' % (time.time() - start)

        # initialize anchors (including estimating local ppca models)
        start = time.time()

        anchors = []
        for j in xrange(self.n_anchors):
            anchors.append(Anchor(A[j,:], X[nbrs_of_A[j,:],:], self.inner_dim))

            # nbrs = np.where(km.labels_==j)[0]
            # nbr_samples = np.random.choice(nbrs, self.n_data_per_anchor)
            # anchors.append(Anchor(A[j,:], X[nbr_samples,:] * np.random.randint(2, size = X[nbr_samples,:].shape), self.inner_dim))

        print 'Constructing clouds: %.3f secs' % (time.time() - start)

        # find nearest anchors for datapoints
        start = time.time()
        _, nbrs_of_X = NearestNeighbors(n_neighbors = self.n_anchor_per_data).fit(A).kneighbors(X)
        print 'Nearest anchor search: %.3f secs' % (time.time() - start)

        # compute probability assignment with the "exp-normalize" trick
        n = X.shape[0]

        start = time.time()

        T = np.zeros((n, self.n_anchor_per_data))
        for i in xrange(n):
            for j in xrange(self.n_anchor_per_data):
                T[i,j] = anchors[nbrs_of_X[i,j]].log_ppca_density(X[i,:])

        T = normalize(np.exp(T - np.max(T, axis=1)[:,np.newaxis]), axis=1, norm='l1')

        self.Z = np.zeros((n, self.n_anchors))
        self.Z[np.arange(n)[:,np.newaxis], nbrs_of_X] = T

        print 'Constructing Z: %.3f secs' % (time.time() - start)