def kmeansselect(self): kmeans_mdl = Kmeans(self.data, num_bases=self._nsub) kmeans_mdl.initialization() kmeans_mdl.factorize() # pick data samples closest to the centres idx = dist.vq(kmeans_mdl.data, kmeans_mdl.W) return idx
def _map_w_to_data(self): """ Return data points that are most similar to basis vectors W """ # assign W to the next best data sample self._Wmapped_index = vq(self.data, self.W) self.Wmapped = np.zeros(self.W.shape) # do not directly assign, i.e. Wdist = self.data[:,sel] # as self might be unsorted (in non ascending order) # -> sorting sel would screw the matching to W if # self.data is stored as a hdf5 table (see h5py) for i, s in enumerate(self._Wmapped_index): self.Wmapped[:,i] = self.data[:,s]
def _map_w_to_data(self): """ Return data points that are most similar to basis vectors W """ # assign W to the next best data sample self._Wmapped_index = vq(self.data, self.W) self.Wmapped = np.zeros(self.W.shape) # do not directly assign, i.e. Wdist = self.data[:,sel] # as self might be unsorted (in non ascending order) # -> sorting sel would screw the matching to W if # self.data is stored as a hdf5 table (see h5py) for i, s in enumerate(self._Wmapped_index): self.Wmapped[:, i] = self.data[:, s]
def select_hull_points(data, n=3): """ select data points for pairwise projections of the first n dimensions """ # iterate over all projections and select data points idx = np.array([]) # iterate over some pairwise combinations of dimensions for i in combinations(range(n), 2): # sample convex hull points in 2D projection convex_hull_d = quickhull(data[i, :].T) # get indices for convex hull data points idx = np.append(idx, vq(data[i, :], convex_hull_d.T)) idx = np.unique(idx) return np.int32(idx)
def updateW(self): idx = self._subfunc() idx = np.sort(np.int32(idx)) mdl_small = self._mfmethod(self.data[:, idx], num_bases=self._num_bases, niter=self._niter, show_progress=self._show_progress, compW=True) # initialize W, H, and beta mdl_small.initialization() # determine W mdl_small.factorize() self.mdl = self._mfmethod(self.data[:, :], num_bases=self._num_bases , niter=self._niterH, show_progress=self._show_progress, compW=False) self.mdl.initialization() if self._mapW: # compute pairwise distances #distance = vq(self.data, self.W) _Wmapped_index = dist.vq(self.mdl.data, mdl_small.W) # do not directly assign, i.e. Wdist = self.data[:,sel] # as self might be unsorted (in non ascending order) # -> sorting sel would screw the matching to W if # self.data is stored as a hdf5 table (see h5py) for i,s in enumerate(_Wmapped_index): self.mdl.W[:,i] = self.mdl.data[:,s] else: self.mdl.W = np.copy(mdl_small.W)
def update_w(self): idx = self._subfunc() idx = np.sort(np.int32(idx)) mdl_small = self._mfmethod(self.data[:, idx], num_bases=self._num_bases, show_progress=self._show_progress, compute_w=True) # initialize W, H, and beta mdl_small.initialization() # determine W mdl_small.factorize() self.mdl = self._mfmethod(self.data[:, :], num_bases=self._num_bases , show_progress=self._show_progress, compute_w=False) self.mdl.initialization() if self._mapW: # compute pairwise distances #distance = vq(self.data, self.W) _Wmapped_index = dist.vq(self.mdl.data, mdl_small.W) # do not directly assign, i.e. Wdist = self.data[:,sel] # as self might be unsorted (in non ascending order) # -> sorting sel would screw the matching to W if # self.data is stored as a hdf5 table (see h5py) for i,s in enumerate(_Wmapped_index): self.mdl.W[:,i] = self.mdl.data[:,s] else: self.mdl.W = np.copy(mdl_small.W)
def update_h(self): # and assign samples to the best matching centers self.assigned = dist.vq(self.W, self.data) self.H = np.zeros(self.H.shape) self.H[self.assigned, range(self._num_samples)] = 1.0