def assign_kmeans_labels(pos, centers, verbose=False): """ Defines 2D patches on the sky via spherical k-means Parameters ---------- pos : np.ndarray positions of points in (RA, DEC) centers : int or np.ndarray Number of centers to use, or the (RA, DEC) coordinates of the centers verbose : bool verbose flag to pass to **kmeans_radec** Returns ------- np.array, np.ndarray * K-means labels * K-means centers """ if not np.iterable(centers): # if centers is a number ncen = centers nsample = pos.shape[0] // 2 km = krd.kmeans_sample(pos, ncen=ncen, nsample=nsample, verbose=verbose) if not km.converged: km.run(pos, maxiter=100) else: # if centers is an array of RA, DEC pairs assert len(centers.shape) == 2 # shape should be (:, 2) km = krd.KMeans(centers) labels = km.find_nearest(pos).astype(int) return labels, km.centers
def _FindIndex(self, regions=None): if regions is None: try: regions = self.regions except: raise Exception('You must specify a regions file to use') self.index = [] if type(regions) == str: regions = [regions] * len(self.jkargs) if len(regions) != len(self.jkargs): raise Exception( 'Number or regions files (%i) does not match the number of jkargs (%i)' % (len(regions), len(self.jkargs))) for i in range(len(self.jkargs)): centers = _np.loadtxt(regions[i]) self.njack = centers.shape[0] km = kmeans_radec.KMeans(centers) ra, dec = self._GetRaDec(i) rdi = _np.zeros((len(ra), 2)) rdi[:, 0] = ra rdi[:, 1] = dec index = km.find_nearest(rdi) self.index.append(index)
def find_centers(x_samp, ncen, RA_bounds, Dec_bounds, maxiter=100): for i in range(10): RA = RA_bounds[0] + (RA_bounds[1] - RA_bounds[0]) * np.random.rand(ncen) Dec = Dec_bounds[0] + (Dec_bounds[1] - Dec_bounds[0]) * np.random.rand(ncen) cen_guess = np.array([RA, Dec]).T #print(cen_guess) km = kmrd.KMeans(cen_guess, verbose=0) km.run(X=x_samp.T, maxiter=maxiter) sys.stdout.flush() if (not np.any(np.bincount(km.labels) == 0)): return km.centers print("Did not find a good set of centers")
def GenerateRegions(jarrs, jras, jdecs, jfile, njack, gindex, jtype): if jtype == 'generate': rdi = np.zeros((len(jarrs[gindex]), 2)) rdi[:, 0] = jarrs[gindex][jras[gindex]] rdi[:, 1] = jarrs[gindex][jdecs[gindex]] if jfile is None: jfile = 'JK-{0}.txt'.format(njack) km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5) if not km.converged: raise RuntimeError("k means did not converge") np.savetxt(jfile, km.centers) elif jtype == 'read': centers = np.loadtxt(jfile) km = kmeans_radec.KMeans(centers) njack = len(centers) return [km, jfile]
def assign_jk_labels(ra, dec, centers): """ Assigns a Jacknife (JK) label to the points based on the passed centers Parameters ----------- ra : np.array Right Ascension of objects dec : np.array Declination of objects centers : np.array Coordinates of centers for K-means patches Returns -------- bool array, bool array, int array * inds which are *NOT IN* patch i, * inds which are *IN* patch i, * JK labels """ pos = np.vstack((ra, dec)).T km = krd.KMeans(centers) labels = km.find_nearest(pos).astype(int) sub_labels = np.arange(len(centers), dtype=int) # sub_labels = np.unique(labels) # indexes of clusters for subsample i non_indexes = [np.where(labels != ind)[0] for ind in sub_labels] # indexes of clusters not in subsample i indexes = [np.where(labels == ind)[0] for ind in sub_labels] return indexes, non_indexes, labels
def get_patches(self, centers, verbose=False): """ Obtains JK subpatches using a spherical k-means algorithm (from Erin) :param centers: JK center coordinates (RA, DEC) or numbers :param verbose: passed to kmeans radec """ if not np.iterable(centers): # if centers is a number self.ncen = centers nsample = self.pos.shape[0] // 2 self.km = krd.kmeans_sample(self.pos, ncen=self.ncen, nsample=nsample, verbose=verbose) if not self.km.converged: self.km.run(self.pos, maxiter=100) self.centers = self.km.centers else: # if centers is an array of RA, DEC pairs assert len(centers.shape) == 2 # shape should be (:, 2) self.km = krd.KMeans(centers) self.centers = centers self.ncen = len(centers) self.labels = self.km.find_nearest(self.pos).astype(int) self.sub_labels = np.unique(self.labels) # indexes of clusters for subsample i self.indexes = [np.where(self.labels != ind)[0] for ind in self.sub_labels] # indexes of clusters not in subsample i self.non_indexes = [np.where(self.labels == ind)[0] for ind in self.sub_labels] self.dsx_sub = np.zeros(shape=(self.nbin, self.ncen)) self.dst_sub = np.zeros(shape=(self.nbin, self.ncen))
def JackknifeOnSphere(jarrs, jras, jdecs, jfunc, jargs=[], jkwargs={}, jtype='generate', jfile=None, njack=24, generateonly=False, gindex=0, varonly=False, save=None): jarrs = EnforceArray2D(jarrs) jras = EnforceArray2D(jras) jdec = EnforceArray2D(jdecs) if jtype == 'generate': rdi = np.zeros((len(jarrs[gindex]), 2)) rdi[:, 0] = jarrs[gindex][jras[gindex]] rdi[:, 1] = jarrs[gindex][jdecs[gindex]] if jfile is None: jfile = 'JK-{0}.txt'.format(njack) km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5) if not km.converged: raise RuntimeError("k means did not converge") np.savetxt(jfile, km.centers) if generateonly: return jfile elif jtype == 'read': centers = np.loadtxt(jfile) km = kmeans_radec.KMeans(centers) njack = len(centers) ind = [] for i in range(len(jarrs)): rdi = np.zeros((len(jarrs[i]), 2)) rdi[:, 0] = jarrs[i][jras[i]] rdi[:, 1] = jarrs[i][jdecs[i]] index = km.find_nearest(rdi) ind.append(index) full_j, full_other = jfunc(jarrs, *jargs, **jkwargs) full_j = EnforceArray2D(full_j) full_other = EnforceArray1D(full_other) it_j = [] it_other = [] frac = [] for j in range(njack): print 'JK %i' % (j) ja = [] f = [] for i in range(len(full_other)): if j == 0: it_other.append([]) for i in range(len(full_j)): if j == 0: it_j.append([]) for i in range(len(jarrs)): if j == 0: #it_j.append( [] ) frac.append([]) cut = (ind[i] == j) ja.append(jarrs[i][-cut]) ff = np.sum(-cut) / float(len(cut)) f.append(ff) i_j, i_other = jfunc(ja, *jargs, **jkwargs) i_j = EnforceArray2D(i_j) i_other = EnforceArray1D(i_other) for i in range(len(i_j)): it_j[i].append(np.copy(i_j[i])) for i in range(len(jarrs)): frac[i].append(f[i]) for i in range(len(i_other)): it_other[i].append(np.copy(i_other[i])) for i in range(len(it_j)): it_j[i] = np.array(it_j[i]) for i in range(len(frac)): frac[i] = np.array(frac[i]) cov_j = [] for k in range(len(full_j)): if varonly: cov = np.power(np.std(it_j[k], axis=0), 2.0) * njack * float(njack - 1) / njack cov_j.append(cov) else: csize = len(full_j[k]) cov = np.zeros((csize, csize)) for i in range(csize): for j in range(i, csize): cov[i, j] = np.sum( (it_j[k][:, i] - full_j[k][i]) * (it_j[k][:, j] - full_j[k][j])) * float(njack - 1) / njack #cov[i,j] = np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j]) * frac[k] ) if i != j: cov[j, i] = cov[i, j] cov_j.append(cov) if save is not None: vec = os.path.join(save, 'vec') cov = os.path.join(save, 'cov') other = os.path.join(save, 'other') Write2Dir(vec, full_j) Write2Dir(cov, cov_j) Write2Dir(other, full_other) return [full_j, cov_j, full_other, it_other]