def assign_kmeans_labels(pos, centers, verbose=False): """ Defines 2D patches on the sky via spherical k-means Parameters ---------- pos : np.ndarray positions of points in (RA, DEC) centers : int or np.ndarray Number of centers to use, or the (RA, DEC) coordinates of the centers verbose : bool verbose flag to pass to **kmeans_radec** Returns ------- np.array, np.ndarray * K-means labels * K-means centers """ if not np.iterable(centers): # if centers is a number ncen = centers nsample = pos.shape[0] // 2 km = krd.kmeans_sample(pos, ncen=ncen, nsample=nsample, verbose=verbose) if not km.converged: km.run(pos, maxiter=100) else: # if centers is an array of RA, DEC pairs assert len(centers.shape) == 2 # shape should be (:, 2) km = krd.KMeans(centers) labels = km.find_nearest(pos).astype(int) return labels, km.centers
def find_kmeans_centers(ncen, maxiter, tol, points, centers=None, verbose=1): import numpy as np import kmeans_radec as kmrd import sys np.random.seed(0) # Make sure pra and pdec are both given assert points.shape[0] == 2 or points.shape[1] == 2, "Must have RA and Dec" if points.shape[0] == 2: points.T # Check if centers are guessed, and if so, make sure both ra and dec are given if centers is not None: assert centers.shape[0] == 2 or centers.shape[ 1] == 2, "Centers must have RA and Dec if given" if centers.shape[0] == 2: centers.T km = kmrd.kmeans(points, centers, tol=tol, maxiter=maxiter, verbose=verbose) else: km = kmrd.kmeans_sample(points, ncen, maxiter=maxiter, tol=tol, verbose=verbose) return (km.centers)
def add_jackknife_field(catalog, njackfields): """Assign jackknife regions to random or lens catalogs. Parameters ---------- catalog : numpy array Lens or random catalog. njackfields : int Number of Jackknife resampling fields. Return ------ Catalog with `jk_field` column. """ # Get field ID fields = get_field_id(catalog) # Make sure that `jk_field` key is available. If not, add one. try: catalog['jk_field'] except ValueError: catalog = append_fields(catalog, 'jk_field', np.zeros(len(catalog), int), usemask=False) # If njackfields = 1, just add 1 to everything, although that is # bad idea for resampling...print out a warning. if njackfields == 1: catalog['jk_field'] = 1 print("# Only one Jackknife field? Seriously?") return catalog # Calculate the number of Jackknife regions per field jk_fields_per_field = get_jk_regions_per_field(catalog, njackfields) jk_next = 0 for i, field in enumerate(fields): indexes = (catalog['field'] == field).nonzero() new_catalog = catalog[indexes] # perform kmeans radec = np.column_stack((new_catalog['ra'], new_catalog['dec'])) km = kmeans_radec.kmeans_sample(radec, jk_fields_per_field[i], maxiter=100, tol=1.0e-5, verbose=False) # assign jk_field, shifting up by jk_next as labels are 0-n new_catalog['jk_field'] = km.labels + jk_next # Write back to the catalog. # Do this rather than recreating so that order is preserved catalog['jk_field'][indexes] = new_catalog['jk_field'] # increment jk_next so that next field has higher jk numbers jk_next += jk_fields_per_field[i] return catalog
def run_kmeans(self): """ run the kmeans algorithm, checking for convergence """ import kmeans_radec self._km = kmeans_radec.kmeans_sample(self._radec, self._njack, maxiter=self._maxiter, tol=self._tol) if not self._km.converged: raise RuntimeError("did not converge")
def make_jk(ra_ran, dec_ran, ra, dec, N=100, dilute_factor=1, rand_out=1, large_mem=True, maxiter=500, tol=1e-05, seed=100): """ Given coordinate of random points, generate JK indecies for another catalog of positions. Include the possibility of diluting the random catalog. Return an array of JK indicies the same length of ra and dec. """ RADEC_ran = np.zeros((len(ra_ran),2)) RADEC_ran[:,0] = ra_ran RADEC_ran[:,1] = dec_ran RADEC = np.zeros((len(ra),2)) RADEC[:,0] = ra RADEC[:,1] = dec np.random.seed(seed) ids = np.arange(len(ra_ran)) np.random.shuffle(ids) RADEC_ran_dilute = np.zeros((len(ra_ran)/dilute_factor,2)) RADEC_ran_dilute[:,0] = ra_ran[ids[:len(ra_ran)/dilute_factor]] RADEC_ran_dilute[:,1] = dec_ran[ids[:len(ra_ran)/dilute_factor]] km = kmeans_radec.kmeans_sample(RADEC_ran_dilute, N, maxiter=500, tol=1e-05) print(np.unique(km.labels)) if large_mem == True: Ntotal = len(RADEC) Ntotal_ran = len(RADEC_ran) JK = np.array([]) JK_ran = np.array([]) for i in range(99): #print i JK = np.concatenate((JK, km.find_nearest(RADEC[i*(Ntotal/100):(i+1)*(Ntotal/100)])), axis=0) print(np.unique(JK)) if rand_out==1: print(i) JK_ran = np.concatenate((JK_ran, km.find_nearest(RADEC_ran[i*(Ntotal_ran/100):(i+1)*(Ntotal_ran/100)])), axis=0) JK = np.concatenate((JK, km.find_nearest(RADEC[99*(Ntotal/100):])), axis=0) if rand_out==1: JK_ran = np.concatenate((JK_ran, km.find_nearest(RADEC_ran[99*(Ntotal_ran/100):])), axis=0) print('len of random', len(ra_ran)) print('len of JK', len(JK_ran)) else: JK = km.find_nearest(RADEC) if rand_out==1: JK_ran = km.find_nearest(RADEC_ran) if rand_out==1: return JK_ran, JK else: return JK, JK
def make_jack_samples_simple(njack, cat): radec=np.zeros( (len(cat['RA']), 2) ) radec[:,0] = cat['RA'].flatten() radec[:,1] = cat['DEC'].flatten() _maxiter=100 _tol=1.0e-5 _km = kmeans_radec.kmeans_sample(radec, njack, maxiter=_maxiter, tol=_tol) uniquelabel = np.unique(_km.labels) jacklist = np.empty(njack, dtype=np.object) for i in range(njack): jacklist[i]=np.where(_km.labels!=uniquelabel[i])[0] return jacklist
def make_jk_id(N2d, edges, mask, ra_ref, ra_range, dec_range, ncen, maxiter, tol): """ return grid of ids for jk sample. """ radec = np.zeros((len(N2d.flatten()), 2)) Ngal = np.zeros((len(N2d.flatten()), 1)) ii = [] jj = [] nn = [] for i in range(len(N2d)): for j in range(len(N2d[0])): ra_fixed = ((edges[1][j] + edges[1][j + 1]) / 2 - ra_ref) / np.cos( (edges[0][i] + edges[0][i + 1]) / 2 / 180 * np.pi) + ra_ref dec_fixed = (edges[0][i] + edges[0][i + 1]) / 2 Ngal[i * len(N2d[0]) + j][0] = mask[i][j] radec[i * len(N2d[0]) + j][0] = ra_fixed radec[i * len(N2d[0]) + j][1] = dec_fixed ii.append(i) jj.append(j) print('Apply mask...') w, = np.where((radec[:, 0] > ra_range[0]) & (radec[:, 0] < ra_range[1]) & (radec[:, 1] > dec_range[0]) & (radec[:, 1] < dec_range[1]) & (Ngal[:, 0] > 0)) # now run kmeans km = kmeans_radec.kmeans_sample(radec[w, :], ncen, maxiter=maxiter, tol=tol) if not km.converged: raise RuntimeError("k means did not converge") labels_grid_1d = np.zeros(len(ii)) labels_grid_1d[w] = km.labels labels_grid_2d = N2d * 0.0 for i in range(len(w)): labels_grid_2d[ii[w[i]]][jj[w[i]]] = labels_grid_1d[w[i]] return labels_grid_2d
def make_jk_from_random(ra_ran, dec_ran, N=100, dilute_factor=1, maxiter=500, tol=1e-05, seed=100): """ Given coordinate of random points, generate JK indecies. """ RADEC_ran = np.zeros((len(ra_ran),2)) RADEC_ran[:,0] = ra_ran RADEC_ran[:,1] = dec_ran np.random.seed(seed) ids = np.arange(len(ra_ran)) np.random.shuffle(ids) RADEC_ran_dilute = np.zeros((len(ra_ran)/dilute_factor,2)) RADEC_ran_dilute[:,0] = ra_ran[ids[:len(ra_ran)/dilute_factor]] RADEC_ran_dilute[:,1] = dec_ran[ids[:len(ra_ran)/dilute_factor]] km = kmeans_radec.kmeans_sample(RADEC_ran_dilute, N, maxiter=500, tol=1e-05) return km, RADEC_ran_dilute[:,0], RADEC_ran_dilute[:,1]
def GenerateJKRegions(ra, dec, njack, jfile, maxiter=200, tol=1.0e-5): """ Generate k-means clusters from a set of data, using `kmeans_radec <https://github.com/esheldon/kmeans_radec>`_. If you're unfamilar with the k-means algorithm, the `wikipedia page <https://en.wikipedia.org/wiki/K-means_clustering>`_ is helpful. For roughly uniform data, it generates N-clusters of roughly equal cardinality. Here, distances are computed on the surface of the unit sphere, and coordinates are given as RA/DEC. Parameters ---------- ra (float array) Right ascension values for each data point. dec (float array) Declination values for each data point. njack (int) Number of k-means clusters to generate, i.e. the number of JK regions. jfile (str) Output file name to save the regions. maxiter (int) Maximum number of iterations for the k-means generation, see `kmeans_radec documentation <https://github.com/esheldon/kmeans_radec>`_. tol (float) Tolerance level needed to be considered converged, see `kmeans_radec documentation <https://github.com/esheldon/kmeans_radec>`_. Returns ------- None """ rd = _np.zeros((len(ra), 2)) rd[:, 0] = ra rd[:, 1] = dec km = kmeans_radec.kmeans_sample(rd, njack, maxiter=maxiter, tol=tol) if not km.converged: raise RuntimeError("k means did not converge") dir = os.path.dirname(jfile) if not os.path.exists(dir): os.makedirs(dir) _np.savetxt(jfile, km.centers)
def kmeans(self, ncen=40): randoms = self.cat.copy() X = np.column_stack( (self.mod(randoms[self.ra_col]), randoms[self.dec_col])) km = kmeans_sample(X, ncen, maxiter=100, tol=1.0e-5, nsample=len(X)) jk_labels = km.labels + 1 if self.do_3d: jk_labels, zedge = self.slice_kmeans(jk_labels, self.cat, self.z_col, zbound=self.zlims) rand_z, rand_r = self.cat[self.z_col], self.cat[self.r_col] zsort = np.argsort(rand_z) rand_z, rand_r = rand_z[zsort], rand_r[zsort] self.comoving = lambda z: np.interp(z, rand_z, rand_r) t = Table(self.cat) t['jackknife_ID'] = jk_labels t.write(self.catpath, overwrite=1) if hasattr(self, 'exports'): for cat in self.exports.keys(): print('== exporting jackknife to', cat) racol, decol, zcol = self.exports[cat] t1 = Table.read(cat) X2 = np.column_stack((self.mod(t1[racol]), t1[decol])) jk_labels2 = km.find_nearest(X2) + 1 if self.do_3d: jk_labels2, zedge = self.slice_kmeans(jk_labels2, t1, zcol, zbound=self.zlims, zedge=zedge) t1['jackknife_ID'] = jk_labels2 t1.write(cat, overwrite=1) del X2, t1 gc.collect() if self.plot: #try: self.plot_jackknife()
def GenerateRegions(jarrs, jras, jdecs, jfile, njack, gindex, jtype): if jtype=='generate': rdi = np.zeros( (len(jarrs[gindex]),2) ) rdi[:,0] = jarrs[gindex][jras[gindex]] rdi[:,1] = jarrs[gindex][jdecs[gindex]] if jfile is None: jfile = 'JK-{0}.txt'.format(njack) km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5) if not km.converged: raise RuntimeError("k means did not converge") np.savetxt(jfile, km.centers) elif jtype=='read': centers = np.loadtxt(jfile) km = kmeans_radec.KMeans(centers) njack = len(centers) return [km, jfile]
def GenerateRegions(jarrs, jras, jdecs, jfile, njack, gindex, jtype): if jtype == 'generate': rdi = np.zeros((len(jarrs[gindex]), 2)) rdi[:, 0] = jarrs[gindex][jras[gindex]] rdi[:, 1] = jarrs[gindex][jdecs[gindex]] if jfile is None: jfile = 'JK-{0}.txt'.format(njack) km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5) if not km.converged: raise RuntimeError("k means did not converge") np.savetxt(jfile, km.centers) elif jtype == 'read': centers = np.loadtxt(jfile) km = kmeans_radec.KMeans(centers) njack = len(centers) return [km, jfile]
def jk_kmeans(ra_sam, dec_sam, ra, dec, njk, plot=False): ''' Function that takes RA and Dec from a given catalog and computes JK regions using kmeans_radec module by Erin Sheldon. Parameters ---------- ra, dec : numpy arrays of RA and Dec. len(ra) = len(dec) = number of galaxies. njk : number of JK regions. Returns ------- jk = JK region for each galaxy: integer ranging from 0 to njk-1. It is numpy array with the same length as ra and dec. ''' from astropy.coordinates import SkyCoord, Angle from astropy import units radec = np.zeros((len(ra), 2)) radec_sam = np.zeros((len(ra_sam), 2)) radec[:, 0] = ra radec_sam[:, 0] = ra_sam radec[:, 1] = dec radec_sam[:, 1] = dec_sam km = kmeans_radec.kmeans_sample(radec_sam, njk, maxiter=500, tol=1e-05) jk = km.find_nearest(radec) if not km.converged: print 'k means did not converge' if plot: coords = SkyCoord(ra=ra, dec=dec, unit='degree') ra = coords.ra.wrap_at(180 * units.deg) dec = coords.dec plt.figure() plt.scatter(ra, dec, c=jk, lw=0, cmap='Paired', rasterized=True) plt.xlabel(r'RA', fontsize=12) plt.ylabel(r'Dec', fontsize=12) plt.tight_layout() plt.savefig('jk_kmeans.png') return jk
def get_patches(self, centers, verbose=False): """ Obtains JK subpatches using a spherical k-means algorithm (from Erin) :param centers: JK center coordinates (RA, DEC) or numbers :param verbose: passed to kmeans radec """ if not np.iterable(centers): # if centers is a number self.ncen = centers nsample = self.pos.shape[0] // 2 self.km = krd.kmeans_sample(self.pos, ncen=self.ncen, nsample=nsample, verbose=verbose) if not self.km.converged: self.km.run(self.pos, maxiter=100) self.centers = self.km.centers else: # if centers is an array of RA, DEC pairs assert len(centers.shape) == 2 # shape should be (:, 2) self.km = krd.KMeans(centers) self.centers = centers self.ncen = len(centers) self.labels = self.km.find_nearest(self.pos).astype(int) self.sub_labels = np.unique(self.labels) # indexes of clusters for subsample i self.indexes = [np.where(self.labels != ind)[0] for ind in self.sub_labels] # indexes of clusters not in subsample i self.non_indexes = [np.where(self.labels == ind)[0] for ind in self.sub_labels] self.dsx_sub = np.zeros(shape=(self.nbin, self.ncen)) self.dst_sub = np.zeros(shape=(self.nbin, self.ncen))
from kmeans_radec import KMeans, kmeans_sample from astropy.table import Table lens = fitsio.read("lens.fits", columns=["ra_gal", "dec_gal", "observed_redshift_gal"]) ra, dec = lens["ra_gal"], lens["dec_gal"] ra_min, dec_min, ra_max, dec_max = 0, 0, 90, 90 Nr = 50000000 ran_ra = np.random.uniform(0, 360, Nr) ran_dec = np.degrees(np.arcsin(np.random.uniform(-1, 1, Nr))) ran_mask = (ran_ra > ra_min) & (ran_ra < ra_max) & (ran_dec > dec_min) & ( ran_dec < dec_max) ran_ra, ran_dec = ran_ra[ran_mask], ran_dec[ran_mask] randoms = {'ra': ran_ra, 'dec': ran_dec} coord = np.vstack([randoms['ra'], randoms['dec']]).T ncen = 100 km = kmeans_sample(coord, ncen, maxiter=30, tol=1.0e-4) labels = km.find_nearest(coord) table = Table([coord[:, 0], coord[:, 1], labels], names=('RA', 'DEC', 'JK_LABEL')) table.write('flagship_randoms_v2.fits', format='fits') np.savetxt("flagship_jk_centers_v2.txt", km.centers)
def get_jkobj(radec_mat, njk): jkobj_map = kmeans_radec.kmeans_sample(radec_mat, njk, maxiter=200) return jkobj_map
def add_jackknife_both(lens_ds, rand_ds, njack, lens_ds_2=None, rand_ds_2=None): """Assign jackknife regions to random and lens catalogs. Parameters ---------- lens_ds : numpy array Pre-compute results for lenses rand_ds : numpy array Pre-compute results for randoms njack : int Number of required jackknife fields lens_ds_2 : numpy array, optional Second pre-compute results for lenses. Default: None rand_ds_2 : numpy array, optional Second pre-compute results for randoms. Default: None Return ------ Updated lens and random catalogs with `jk_field` information. """ # Get field ID fields = get_field_id(lens_ds) # Make sure that `jk_field` key is available. If not, add one. try: lens_ds['jk_field'] except ValueError: lens_ds = append_fields(lens_ds, 'jk_field', np.zeros(len(lens_ds), int), usemask=False) # The same for the random catalog. try: rand_ds['jk_field'] except ValueError: rand_ds = append_fields(rand_ds, 'jk_field', np.zeros(len(rand_ds), int), usemask=False) # If njackfields = 1, just add 1 to everything, although that is # bad idea for resampling...print out a warning. if njack == 1: lens_ds['jk_field'] = 1 rand_ds['jk_field'] = 1 print("# Only one Jackknife field? Seriously?") return lens_ds, rand_ds if (lens_ds_2 is not None) and (rand_ds_2 is not None): try: lens_ds_2['jk_field'] except ValueError: lens_ds_2 = append_fields(lens_ds_2, 'jk_field', np.zeros(len(lens_ds_2), int), usemask=False) try: rand_ds_2['jk_field'] except ValueError: rand_ds_2 = append_fields(rand_ds_2, 'jk_field', np.zeros(len(rand_ds_2), int), usemask=False) if njack == 1: lens_ds['jk_field'] = 1 rand_ds['jk_field'] = 1 lens_ds_2['jk_field'] = 1 rand_ds_2['jk_field'] = 1 print("# Only one Jackknife field? Seriously?") return lens_ds, rand_ds, lens_ds_2, rand_ds_2 # Use the results with more objects as reference # In principle, the random catalog should have many more objects than the lenses # TODO: Still, when the number of lens is smaller than a threshold, # We should do something else if len(rand_ds) > len(lens_ds): jk_fields_per_field = get_jk_regions_per_field(rand_ds, njack) else: jk_fields_per_field = get_jk_regions_per_field(lens_ds, njack) jk_next = 0 if (lens_ds_2 is not None) and (rand_ds_2 is not None): # Make sure both pre-compute results share the same jackknife fields for i, field in enumerate(fields): rand_mask = rand_ds['field'] == field lens_mask = lens_ds['field'] == field rand_mask_2 = rand_ds_2['field'] == field lens_mask_2 = lens_ds_2['field'] == field if sum(rand_mask) == 0: continue rand_field = rand_ds[rand_mask] lens_field = lens_ds[lens_mask] rand_field_2 = rand_ds_2[rand_mask_2] lens_field_2 = lens_ds_2[lens_mask_2] # perform kmeans radec = np.column_stack((rand_field['ra'], rand_field['dec'])) km = kmeans_radec.kmeans_sample(radec, jk_fields_per_field[i], maxiter=100, tol=1.0e-5, verbose=False) # assign jk_field, shifting up by jk_next as labels are 0-n rand_field['jk_field'] = km.labels + jk_next # kmeans centers ra_centers = np.array([k[0] for k in km.centers]) dec_centers = np.array([k[1] for k in km.centers]) # assign jackknife field in lens catalog based on nearest kmeans center for lens in lens_field: closest_jk = closest_point(lens['ra'], lens['dec'], ra_centers, dec_centers) lens['jk_field'] = closest_jk + jk_next for lens in lens_field_2: closest_jk = closest_point(lens['ra'], lens['dec'], ra_centers, dec_centers) lens['jk_field'] = closest_jk + jk_next for rand in rand_field_2: closest_jk = closest_point(rand['ra'], rand['dec'], ra_centers, dec_centers) rand['jk_field'] = closest_jk + jk_next # increment jk_next so that next field has higher jk numbers jk_next += jk_fields_per_field[i] # write back to the catalog. # Do this rather than recreating so that order is preserved rand_ds['jk_field'][rand_mask] = rand_field['jk_field'] lens_ds['jk_field'][lens_mask] = lens_field['jk_field'] rand_ds_2['jk_field'][rand_mask_2] = rand_field_2['jk_field'] lens_ds_2['jk_field'][lens_mask_2] = lens_field_2['jk_field'] return lens_ds, rand_ds, lens_ds_2, rand_ds_2 else: for i, field in enumerate(fields): rand_mask = rand_ds['field'] == field lens_mask = lens_ds['field'] == field if sum(rand_mask) == 0 and sum(lens_mask) == 0: continue rand_field = rand_ds[rand_mask] lens_field = lens_ds[lens_mask] # perform kmeans radec = np.column_stack((rand_field['ra'], rand_field['dec'])) km = kmeans_radec.kmeans_sample(radec, jk_fields_per_field[i], maxiter=100, tol=1.0e-5, verbose=False) # assign jk_field, shifting up by jk_next as labels are 0-n rand_field['jk_field'] = km.labels + jk_next # kmeans centers ra_centers = np.array([k[0] for k in km.centers]) dec_centers = np.array([k[1] for k in km.centers]) # assign jackknife field in lens catalog based on nearest kmeans center for lens in lens_field: closest_jk = closest_point(lens['ra'], lens['dec'], ra_centers, dec_centers) lens['jk_field'] = closest_jk + jk_next # increment jk_next so that next field has higher jk numbers jk_next += jk_fields_per_field[i] # write back to the catalog. # Do this rather than recreating so that order is preserved rand_ds['jk_field'][rand_mask] = rand_field['jk_field'] lens_ds['jk_field'][lens_mask] = lens_field['jk_field'] return lens_ds, rand_ds
def JackknifeOnSphere(jarrs, jras, jdecs, jfunc, jargs=[], jkwargs={}, jtype='generate', jfile=None, njack=24, generateonly=False, gindex=0, varonly=False, save=None): jarrs = EnforceArray2D(jarrs) jras = EnforceArray2D(jras) jdec = EnforceArray2D(jdecs) if jtype=='generate': rdi = np.zeros( (len(jarrs[gindex]),2) ) rdi[:,0] = jarrs[gindex][jras[gindex]] rdi[:,1] = jarrs[gindex][jdecs[gindex]] if jfile is None: jfile = 'JK-{0}.txt'.format(njack) km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5) if not km.converged: raise RuntimeError("k means did not converge") np.savetxt(jfile, km.centers) if generateonly: return jfile elif jtype=='read': centers = np.loadtxt(jfile) km = kmeans_radec.KMeans(centers) njack = len(centers) ind = [] for i in range(len(jarrs)): rdi = np.zeros( (len(jarrs[i]),2) ) rdi[:,0] = jarrs[i][jras[i]] rdi[:,1] = jarrs[i][jdecs[i]] index = km.find_nearest(rdi) ind.append(index) full_j, full_other = jfunc(jarrs, *jargs, **jkwargs) full_j = EnforceArray2D(full_j) full_other = EnforceArray1D(full_other) it_j = [] it_other = [] frac = [] for j in range(njack): print 'JK %i' %(j) ja = [] f = [] for i in range(len(full_other)): if j==0: it_other.append( [] ) for i in range(len(full_j)): if j==0: it_j.append( [] ) for i in range(len(jarrs)): if j==0: #it_j.append( [] ) frac.append( [] ) cut = (ind[i]==j) ja.append(jarrs[i][-cut]) ff = np.sum(-cut)/float(len(cut)) f.append(ff) i_j, i_other = jfunc(ja, *jargs, **jkwargs) i_j = EnforceArray2D(i_j) i_other = EnforceArray1D(i_other) for i in range(len(i_j)): it_j[i].append( np.copy(i_j[i]) ) for i in range(len(jarrs)): frac[i].append( f[i] ) for i in range(len(i_other)): it_other[i].append( np.copy(i_other[i]) ) for i in range(len(it_j)): it_j[i] = np.array(it_j[i]) for i in range(len(frac)): frac[i] = np.array(frac[i]) cov_j = [] for k in range(len(full_j)): if varonly: cov = np.power( np.std(it_j[k], axis=0), 2.0 ) * njack * float(njack-1)/njack cov_j.append(cov) else: csize = len(full_j[k]) cov = np.zeros( (csize,csize) ) for i in range(csize): for j in range(i, csize): cov[i,j] = np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j]) ) * float(njack-1)/njack #cov[i,j] = np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j]) * frac[k] ) if i!=j: cov[j,i] = cov[i,j] cov_j.append(cov) if save is not None: vec = os.path.join(save, 'vec') cov = os.path.join(save, 'cov') other = os.path.join(save, 'other') Write2Dir(vec, full_j) Write2Dir(cov, cov_j) Write2Dir(other, full_other) return [full_j, cov_j, full_other, it_other]
def JackknifeOnSphere(jarrs, jras, jdecs, jfunc, jargs=[], jkwargs={}, jtype='generate', jfile=None, njack=24, generateonly=False, gindex=0, varonly=False, save=None): jarrs = EnforceArray2D(jarrs) jras = EnforceArray2D(jras) jdec = EnforceArray2D(jdecs) if jtype == 'generate': rdi = np.zeros((len(jarrs[gindex]), 2)) rdi[:, 0] = jarrs[gindex][jras[gindex]] rdi[:, 1] = jarrs[gindex][jdecs[gindex]] if jfile is None: jfile = 'JK-{0}.txt'.format(njack) km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5) if not km.converged: raise RuntimeError("k means did not converge") np.savetxt(jfile, km.centers) if generateonly: return jfile elif jtype == 'read': centers = np.loadtxt(jfile) km = kmeans_radec.KMeans(centers) njack = len(centers) ind = [] for i in range(len(jarrs)): rdi = np.zeros((len(jarrs[i]), 2)) rdi[:, 0] = jarrs[i][jras[i]] rdi[:, 1] = jarrs[i][jdecs[i]] index = km.find_nearest(rdi) ind.append(index) full_j, full_other = jfunc(jarrs, *jargs, **jkwargs) full_j = EnforceArray2D(full_j) full_other = EnforceArray1D(full_other) it_j = [] it_other = [] frac = [] for j in range(njack): print 'JK %i' % (j) ja = [] f = [] for i in range(len(full_other)): if j == 0: it_other.append([]) for i in range(len(full_j)): if j == 0: it_j.append([]) for i in range(len(jarrs)): if j == 0: #it_j.append( [] ) frac.append([]) cut = (ind[i] == j) ja.append(jarrs[i][-cut]) ff = np.sum(-cut) / float(len(cut)) f.append(ff) i_j, i_other = jfunc(ja, *jargs, **jkwargs) i_j = EnforceArray2D(i_j) i_other = EnforceArray1D(i_other) for i in range(len(i_j)): it_j[i].append(np.copy(i_j[i])) for i in range(len(jarrs)): frac[i].append(f[i]) for i in range(len(i_other)): it_other[i].append(np.copy(i_other[i])) for i in range(len(it_j)): it_j[i] = np.array(it_j[i]) for i in range(len(frac)): frac[i] = np.array(frac[i]) cov_j = [] for k in range(len(full_j)): if varonly: cov = np.power(np.std(it_j[k], axis=0), 2.0) * njack * float(njack - 1) / njack cov_j.append(cov) else: csize = len(full_j[k]) cov = np.zeros((csize, csize)) for i in range(csize): for j in range(i, csize): cov[i, j] = np.sum( (it_j[k][:, i] - full_j[k][i]) * (it_j[k][:, j] - full_j[k][j])) * float(njack - 1) / njack #cov[i,j] = np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j]) * frac[k] ) if i != j: cov[j, i] = cov[i, j] cov_j.append(cov) if save is not None: vec = os.path.join(save, 'vec') cov = os.path.join(save, 'cov') other = os.path.join(save, 'other') Write2Dir(vec, full_j) Write2Dir(cov, cov_j) Write2Dir(other, full_other) return [full_j, cov_j, full_other, it_other]
print "making JK's" mask_temp = (mask_sm > 0) #*(ra>10)*(ra<90) ra_temp = ra[mask_temp] dec_temp = dec[mask_temp] pix_temp = pix[mask_temp] RADEC = np.zeros((len(ra_temp), 2)) RADEC[:, 0] = ra_temp RADEC[:, 1] = dec_temp dilute_ids = np.arange(len(ra_temp)) np.random.seed(seed) np.random.shuffle(dilute_ids) dilute_ids = dilute_ids[:len(ra_temp) / 10] km = kmeans_radec.kmeans_sample(RADEC[dilute_ids], Njk, maxiter=500, tol=1e-05) JK = np.zeros(hp.nside2npix(nside)) JK[pix_temp] = km.find_nearest(RADEC) + 1 fits = fitsio.FITS(sim_dir + 'buzzard_y1_' + str(shear) + '_' + str(buzzid) + '_' + str(zbin) + '_mask_jk_' + str(Smooth[i]) + '.fits', 'rw', clobber=True) output = np.zeros(hp.nside2npix(nside), dtype=[('JK', 'f8')]) output['JK'] = JK fits.write(output) mask_temp = (mask_data_sm > 0) #*(ra>10)*(ra<90) ra_temp = ra[mask_temp]
def make_jk(ra_ran, dec_ran, ra, dec, N=100, dilute_factor=1, rand_out=1, large_mem=True, maxiter=500, tol=1e-05, seed=100, centers=False): """ Given coordinate of random points, generate JK indecies for another catalog of positions. Include the possibility of diluting the random catalog. Return an array of JK indicies the same length of ra and dec. """ RADEC_ran = np.zeros((len(ra_ran),2)) RADEC_ran[:,0] = ra_ran RADEC_ran[:,1] = dec_ran RADEC = np.zeros((len(ra),2)) RADEC[:,0] = ra RADEC[:,1] = dec np.random.seed(seed) ids = np.arange(len(ra_ran)) np.random.shuffle(ids) RADEC_ran_dilute = np.zeros((int(len(ra_ran)/dilute_factor),2)) RADEC_ran_dilute[:,0] = ra_ran[ids[:int(len(ra_ran)/dilute_factor)]] RADEC_ran_dilute[:,1] = dec_ran[ids[:int(len(ra_ran)/dilute_factor)]] km = kmeans_radec.kmeans_sample(RADEC_ran_dilute, N, maxiter=500, tol=1e-05) print(np.unique(km.labels)) if large_mem == True: Ntotal = len(RADEC) Ntotal_ran = len(RADEC_ran) JK = np.array([]) JK_ran = np.array([]) for i in range(N-1): #print i JK = np.concatenate((JK, km.find_nearest(RADEC[i*int(Ntotal/N):(i+1)*int(Ntotal/N)])), axis=0) print(np.unique(JK)) if rand_out==1: print(i) JK_ran = np.concatenate((JK_ran, km.find_nearest(RADEC_ran[i*int(Ntotal_ran/N):(i+1)*int(Ntotal_ran/N)])), axis=0) JK = np.concatenate((JK, km.find_nearest(RADEC[(N-1)*int(Ntotal/N):])), axis=0) if rand_out==1: JK_ran = np.concatenate((JK_ran, km.find_nearest(RADEC_ran[(N-1)*int(Ntotal_ran/N):])), axis=0) print('len of random', len(ra_ran)) print('len of JK Random', len(JK_ran)) else: JK = km.find_nearest(RADEC) if rand_out==1: JK_ran = km.find_nearest(RADEC_ran) if centers==True: #Saving the kmeans centers assert km.converged > 0, 'Kmeans did not converge! Try more iterations.' print('Saving Jackknife Centers...') np.savetxt('/project2/chihway/sims/buzzard/y1_gal_member/jk_centers',km.centers) if rand_out==1: return JK_ran, JK else: return JK
def JackknifeOnSphere(jarrs, jras, jdecs, jfunc, jargs=[], jkwargs={}, jtype='generate', jfile=None, njack=24, generateonly=False, gindex=0): jarrs = EnforceArray2D(jarrs) jras = EnforceArray2D(jras) jdec = EnforceArray2D(jdecs) if jtype=='generate': rdi = np.zeros( (len(jarrs[gindex]),2) ) rdi[:,0] = jarrs[gindex][jras[gindex]] rdi[:,1] = jarrs[gindex][jdecs[gindex]] if jfile is None: jfile = 'JK-{0}.txt'.format(njack) km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=100, tol=1.0e-5) if not km.converged: raise RuntimeError("k means did not converge") np.savetxt(jfile, km.centers) if generateonly: return jfile elif jtype=='read': centers = np.loadtxt(jfile) km = kmeans_radec.KMeans(centers) njack = len(centers) ind = [] for i in range(len(jarrs)): rdi = np.zeros( (len(jarrs[i]),2) ) rdi[:,0] = jarrs[i][jras[i]] rdi[:,1] = jarrs[i][jdecs[i]] index = km.find_nearest(rdi) ind.append(index) full_j, full_other = jfunc(jarrs, *jargs, **jkwargs) full_j = EnforceArray2D(full_j) full_other = EnforceArray1D(full_other) it_j = [] it_other = [ [] ] * len(full_other) for j in range(njack): ja = [] for i in range(len(jarrs)): cut = (ind[i]==j) ja.append(jarrs[i][-cut]) if j==0: it_j.append( [] ) i_j, i_other = jfunc(ja, *jargs, **jkwargs) i_j = EnforceArray2D(i_j) i_other = EnforceArray1D(i_other) for i in range(len(i_j)): it_j[i].append( np.copy(i_j[i]) ) for i in range(len(i_other)): it_other[i].append(i_other[i]) for i in range(len(it_j)): it_j[i] = np.array(it_j[i]) cov_j = [] for k in range(len(full_j)): csize = len(full_j[k]) cov = np.zeros( (csize,csize) ) for i in range(csize): for j in range(i, csize): cov[i,j] = np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j]) ) * float(njack-1)/njack if i!=j: cov[j,i] = cov[i,j] cov_j.append(cov) return [full_j, cov_j, full_other, it_other]