Ejemplo n.º 1
0
def assign_kmeans_labels(pos, centers, verbose=False):
    """
    Defines 2D patches on the sky via spherical k-means

    Parameters
    ----------
    pos : np.ndarray
        positions of points in (RA, DEC)
    centers : int or np.ndarray
        Number of centers to use, or the (RA, DEC) coordinates of the centers
    verbose : bool
        verbose flag to pass to **kmeans_radec**

    Returns
    -------
    np.array, np.ndarray
        * K-means labels
        * K-means centers
    """

    if not np.iterable(centers):  # if centers is a number
        ncen = centers
        nsample = pos.shape[0] // 2
        km = krd.kmeans_sample(pos,
                               ncen=ncen,
                               nsample=nsample,
                               verbose=verbose)
        if not km.converged:
            km.run(pos, maxiter=100)
    else:  # if centers is an array of RA, DEC pairs
        assert len(centers.shape) == 2  # shape should be (:, 2)
        km = krd.KMeans(centers)

    labels = km.find_nearest(pos).astype(int)
    return labels, km.centers
Ejemplo n.º 2
0
def find_kmeans_centers(ncen, maxiter, tol, points, centers=None, verbose=1):
    import numpy as np
    import kmeans_radec as kmrd
    import sys

    np.random.seed(0)
    # Make sure pra and pdec are both given
    assert points.shape[0] == 2 or points.shape[1] == 2, "Must have RA and Dec"
    if points.shape[0] == 2:
        points.T

    # Check if centers are guessed, and if so, make sure both ra and dec are given
    if centers is not None:
        assert centers.shape[0] == 2 or centers.shape[
            1] == 2, "Centers must have RA and Dec if given"
        if centers.shape[0] == 2:
            centers.T
        km = kmrd.kmeans(points,
                         centers,
                         tol=tol,
                         maxiter=maxiter,
                         verbose=verbose)
    else:
        km = kmrd.kmeans_sample(points,
                                ncen,
                                maxiter=maxiter,
                                tol=tol,
                                verbose=verbose)

    return (km.centers)
Ejemplo n.º 3
0
def add_jackknife_field(catalog, njackfields):
    """Assign jackknife regions to random or lens catalogs.

    Parameters
    ----------
    catalog : numpy array
        Lens or random catalog.
    njackfields : int
        Number of Jackknife resampling fields.

    Return
    ------
        Catalog with `jk_field` column.
    """
    # Get field ID
    fields = get_field_id(catalog)

    # Make sure that `jk_field` key is available. If not, add one.
    try:
        catalog['jk_field']
    except ValueError:
        catalog = append_fields(catalog,
                                'jk_field',
                                np.zeros(len(catalog), int),
                                usemask=False)

    # If njackfields = 1, just add 1 to everything, although that is
    # bad idea for resampling...print out a warning.
    if njackfields == 1:
        catalog['jk_field'] = 1
        print("# Only one Jackknife field? Seriously?")
        return catalog

    # Calculate the number of Jackknife regions per field
    jk_fields_per_field = get_jk_regions_per_field(catalog, njackfields)
    jk_next = 0

    for i, field in enumerate(fields):
        indexes = (catalog['field'] == field).nonzero()
        new_catalog = catalog[indexes]

        # perform kmeans
        radec = np.column_stack((new_catalog['ra'], new_catalog['dec']))
        km = kmeans_radec.kmeans_sample(radec,
                                        jk_fields_per_field[i],
                                        maxiter=100,
                                        tol=1.0e-5,
                                        verbose=False)

        # assign jk_field, shifting up by jk_next as labels are 0-n
        new_catalog['jk_field'] = km.labels + jk_next

        # Write back to the catalog.
        # Do this rather than recreating so that order is preserved
        catalog['jk_field'][indexes] = new_catalog['jk_field']

        # increment jk_next so that next field has higher jk numbers
        jk_next += jk_fields_per_field[i]

    return catalog
Ejemplo n.º 4
0
    def run_kmeans(self):
        """
        run the kmeans algorithm, checking for convergence
        """
        import kmeans_radec

        self._km = kmeans_radec.kmeans_sample(self._radec, self._njack, maxiter=self._maxiter, tol=self._tol)
        if not self._km.converged:
            raise RuntimeError("did not converge")
def make_jk(ra_ran, dec_ran, ra, dec, N=100, dilute_factor=1, rand_out=1, large_mem=True, maxiter=500, tol=1e-05, seed=100):
    """
    Given coordinate of random points, generate JK indecies 
    for another catalog of positions. Include the possibility 
    of diluting the random catalog. Return an array of JK 
    indicies the same length of ra and dec.  
    """

    RADEC_ran = np.zeros((len(ra_ran),2))
    RADEC_ran[:,0] = ra_ran
    RADEC_ran[:,1] = dec_ran

    RADEC = np.zeros((len(ra),2))
    RADEC[:,0] = ra
    RADEC[:,1] = dec

    np.random.seed(seed)
    ids = np.arange(len(ra_ran))
    np.random.shuffle(ids)
    RADEC_ran_dilute = np.zeros((len(ra_ran)/dilute_factor,2))
    RADEC_ran_dilute[:,0] = ra_ran[ids[:len(ra_ran)/dilute_factor]]
    RADEC_ran_dilute[:,1] = dec_ran[ids[:len(ra_ran)/dilute_factor]]

    km = kmeans_radec.kmeans_sample(RADEC_ran_dilute, N, maxiter=500, tol=1e-05)
    print(np.unique(km.labels))

    if large_mem == True:
        Ntotal = len(RADEC)
        Ntotal_ran = len(RADEC_ran)

        JK = np.array([])
        JK_ran = np.array([])

        for i in range(99):
            #print i
            JK = np.concatenate((JK, km.find_nearest(RADEC[i*(Ntotal/100):(i+1)*(Ntotal/100)])), axis=0)
            print(np.unique(JK))

            if rand_out==1:
                print(i)
                JK_ran = np.concatenate((JK_ran, km.find_nearest(RADEC_ran[i*(Ntotal_ran/100):(i+1)*(Ntotal_ran/100)])), axis=0)

        JK = np.concatenate((JK, km.find_nearest(RADEC[99*(Ntotal/100):])), axis=0)
        if rand_out==1:
            JK_ran = np.concatenate((JK_ran, km.find_nearest(RADEC_ran[99*(Ntotal_ran/100):])), axis=0)
        print('len of random', len(ra_ran))
        print('len of JK', len(JK_ran))

    else:
        JK = km.find_nearest(RADEC)
        if rand_out==1:
            JK_ran = km.find_nearest(RADEC_ran)
    
    if rand_out==1:    
        return JK_ran, JK
    else:
        return JK, JK
Ejemplo n.º 6
0
def make_jack_samples_simple(njack, cat):
   radec=np.zeros( (len(cat['RA']), 2) )
   radec[:,0] = cat['RA'].flatten()
   radec[:,1] = cat['DEC'].flatten()
   _maxiter=100
   _tol=1.0e-5
   _km = kmeans_radec.kmeans_sample(radec, njack, maxiter=_maxiter, tol=_tol)
   uniquelabel = np.unique(_km.labels) 
   jacklist = np.empty(njack, dtype=np.object)
   for i in range(njack):
      jacklist[i]=np.where(_km.labels!=uniquelabel[i])[0]

   return jacklist
Ejemplo n.º 7
0
def make_jk_id(N2d, edges, mask, ra_ref, ra_range, dec_range, ncen, maxiter,
               tol):
    """
    return grid of ids for jk sample.
    """
    radec = np.zeros((len(N2d.flatten()), 2))
    Ngal = np.zeros((len(N2d.flatten()), 1))
    ii = []
    jj = []
    nn = []
    for i in range(len(N2d)):
        for j in range(len(N2d[0])):
            ra_fixed = ((edges[1][j] + edges[1][j + 1]) / 2 - ra_ref) / np.cos(
                (edges[0][i] + edges[0][i + 1]) / 2 / 180 * np.pi) + ra_ref
            dec_fixed = (edges[0][i] + edges[0][i + 1]) / 2
            Ngal[i * len(N2d[0]) + j][0] = mask[i][j]
            radec[i * len(N2d[0]) + j][0] = ra_fixed
            radec[i * len(N2d[0]) + j][1] = dec_fixed
            ii.append(i)
            jj.append(j)

    print('Apply mask...')
    w, = np.where((radec[:, 0] > ra_range[0])
                  & (radec[:, 0] < ra_range[1])
                  & (radec[:, 1] > dec_range[0])
                  & (radec[:, 1] < dec_range[1])
                  & (Ngal[:, 0] > 0))

    # now run kmeans
    km = kmeans_radec.kmeans_sample(radec[w, :],
                                    ncen,
                                    maxiter=maxiter,
                                    tol=tol)
    if not km.converged:
        raise RuntimeError("k means did not converge")

    labels_grid_1d = np.zeros(len(ii))
    labels_grid_1d[w] = km.labels
    labels_grid_2d = N2d * 0.0
    for i in range(len(w)):
        labels_grid_2d[ii[w[i]]][jj[w[i]]] = labels_grid_1d[w[i]]

    return labels_grid_2d
def make_jk_from_random(ra_ran, dec_ran, N=100, dilute_factor=1, maxiter=500, tol=1e-05, seed=100):
    """
    Given coordinate of random points, generate JK indecies.
    """

    RADEC_ran = np.zeros((len(ra_ran),2))
    RADEC_ran[:,0] = ra_ran
    RADEC_ran[:,1] = dec_ran

    np.random.seed(seed)
    ids = np.arange(len(ra_ran))
    np.random.shuffle(ids)
    RADEC_ran_dilute = np.zeros((len(ra_ran)/dilute_factor,2))
    RADEC_ran_dilute[:,0] = ra_ran[ids[:len(ra_ran)/dilute_factor]]
    RADEC_ran_dilute[:,1] = dec_ran[ids[:len(ra_ran)/dilute_factor]]

    km = kmeans_radec.kmeans_sample(RADEC_ran_dilute, N, maxiter=500, tol=1e-05)
    
    return km, RADEC_ran_dilute[:,0], RADEC_ran_dilute[:,1]
Ejemplo n.º 9
0
def GenerateJKRegions(ra, dec, njack, jfile, maxiter=200, tol=1.0e-5):
    """
    Generate k-means clusters from a set of data, using `kmeans_radec <https://github.com/esheldon/kmeans_radec>`_.
    If you're unfamilar with the k-means algorithm, the `wikipedia page <https://en.wikipedia.org/wiki/K-means_clustering>`_ is helpful.
    For roughly uniform data, it generates N-clusters of roughly equal cardinality.
    Here, distances are computed on the surface of the unit sphere, and coordinates are given as RA/DEC.

    Parameters
    ----------
    ra (float array)
        Right ascension values for each data point.
    dec (float array)
        Declination values for each data point.
    njack (int)
        Number of k-means clusters to generate, i.e. the number of JK regions.
    jfile (str)
        Output file name to save the regions.
    maxiter (int)
        Maximum number of iterations for the k-means generation, see `kmeans_radec documentation <https://github.com/esheldon/kmeans_radec>`_.
    tol (float)
        Tolerance level needed to be considered converged, see `kmeans_radec documentation <https://github.com/esheldon/kmeans_radec>`_.

    Returns
    -------
    None

    """

    rd = _np.zeros((len(ra), 2))
    rd[:, 0] = ra
    rd[:, 1] = dec

    km = kmeans_radec.kmeans_sample(rd, njack, maxiter=maxiter, tol=tol)

    if not km.converged:
        raise RuntimeError("k means did not converge")

    dir = os.path.dirname(jfile)
    if not os.path.exists(dir):
        os.makedirs(dir)

    _np.savetxt(jfile, km.centers)
Ejemplo n.º 10
0
def GenerateJKRegions(ra, dec, njack, jfile, maxiter=200, tol=1.0e-5):
    """
    Generate k-means clusters from a set of data, using `kmeans_radec <https://github.com/esheldon/kmeans_radec>`_.
    If you're unfamilar with the k-means algorithm, the `wikipedia page <https://en.wikipedia.org/wiki/K-means_clustering>`_ is helpful.
    For roughly uniform data, it generates N-clusters of roughly equal cardinality.
    Here, distances are computed on the surface of the unit sphere, and coordinates are given as RA/DEC.

    Parameters
    ----------
    ra (float array)
        Right ascension values for each data point.
    dec (float array)
        Declination values for each data point.
    njack (int)
        Number of k-means clusters to generate, i.e. the number of JK regions.
    jfile (str)
        Output file name to save the regions.
    maxiter (int)
        Maximum number of iterations for the k-means generation, see `kmeans_radec documentation <https://github.com/esheldon/kmeans_radec>`_.
    tol (float)
        Tolerance level needed to be considered converged, see `kmeans_radec documentation <https://github.com/esheldon/kmeans_radec>`_.

    Returns
    -------
    None

    """

    rd = _np.zeros((len(ra), 2))
    rd[:, 0] = ra
    rd[:, 1] = dec

    km = kmeans_radec.kmeans_sample(rd, njack, maxiter=maxiter, tol=tol)

    if not km.converged:
        raise RuntimeError("k means did not converge")

    dir = os.path.dirname(jfile)
    if not os.path.exists(dir):
        os.makedirs(dir)

    _np.savetxt(jfile, km.centers)
Ejemplo n.º 11
0
    def kmeans(self, ncen=40):
        randoms = self.cat.copy()
        X = np.column_stack(
            (self.mod(randoms[self.ra_col]), randoms[self.dec_col]))

        km = kmeans_sample(X, ncen, maxiter=100, tol=1.0e-5, nsample=len(X))
        jk_labels = km.labels + 1
        if self.do_3d:
            jk_labels, zedge = self.slice_kmeans(jk_labels,
                                                 self.cat,
                                                 self.z_col,
                                                 zbound=self.zlims)
            rand_z, rand_r = self.cat[self.z_col], self.cat[self.r_col]
            zsort = np.argsort(rand_z)
            rand_z, rand_r = rand_z[zsort], rand_r[zsort]
            self.comoving = lambda z: np.interp(z, rand_z, rand_r)
        t = Table(self.cat)
        t['jackknife_ID'] = jk_labels
        t.write(self.catpath, overwrite=1)

        if hasattr(self, 'exports'):
            for cat in self.exports.keys():
                print('== exporting jackknife to', cat)
                racol, decol, zcol = self.exports[cat]
                t1 = Table.read(cat)
                X2 = np.column_stack((self.mod(t1[racol]), t1[decol]))
                jk_labels2 = km.find_nearest(X2) + 1
                if self.do_3d:
                    jk_labels2, zedge = self.slice_kmeans(jk_labels2,
                                                          t1,
                                                          zcol,
                                                          zbound=self.zlims,
                                                          zedge=zedge)
                t1['jackknife_ID'] = jk_labels2
                t1.write(cat, overwrite=1)
                del X2, t1
                gc.collect()

        if self.plot:
            #try:
            self.plot_jackknife()
Ejemplo n.º 12
0
def GenerateRegions(jarrs, jras, jdecs, jfile, njack, gindex, jtype):

    if jtype=='generate':
        rdi = np.zeros( (len(jarrs[gindex]),2) )
        rdi[:,0] = jarrs[gindex][jras[gindex]]
        rdi[:,1] = jarrs[gindex][jdecs[gindex]]

        if jfile is None:
            jfile = 'JK-{0}.txt'.format(njack)
        km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5)

        if not km.converged:
            raise RuntimeError("k means did not converge")
        np.savetxt(jfile, km.centers)

    elif jtype=='read':
        centers = np.loadtxt(jfile)
        km = kmeans_radec.KMeans(centers)
        njack = len(centers)

    return [km, jfile]
Ejemplo n.º 13
0
def GenerateRegions(jarrs, jras, jdecs, jfile, njack, gindex, jtype):

    if jtype == 'generate':
        rdi = np.zeros((len(jarrs[gindex]), 2))
        rdi[:, 0] = jarrs[gindex][jras[gindex]]
        rdi[:, 1] = jarrs[gindex][jdecs[gindex]]

        if jfile is None:
            jfile = 'JK-{0}.txt'.format(njack)
        km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5)

        if not km.converged:
            raise RuntimeError("k means did not converge")
        np.savetxt(jfile, km.centers)

    elif jtype == 'read':
        centers = np.loadtxt(jfile)
        km = kmeans_radec.KMeans(centers)
        njack = len(centers)

    return [km, jfile]
Ejemplo n.º 14
0
def jk_kmeans(ra_sam, dec_sam, ra, dec, njk, plot=False):
    '''
	Function that takes RA and Dec from a given catalog and computes JK regions using kmeans_radec module by Erin Sheldon.

	Parameters
	----------
        ra, dec : numpy arrays of RA and Dec. len(ra) = len(dec) = number of galaxies.
	njk : number of JK regions.

	Returns
	-------
        jk = JK region for each galaxy: integer ranging from 0 to njk-1. It is numpy array with the same length as ra and dec. 

	'''
    from astropy.coordinates import SkyCoord, Angle
    from astropy import units
    radec = np.zeros((len(ra), 2))
    radec_sam = np.zeros((len(ra_sam), 2))
    radec[:, 0] = ra
    radec_sam[:, 0] = ra_sam
    radec[:, 1] = dec
    radec_sam[:, 1] = dec_sam
    km = kmeans_radec.kmeans_sample(radec_sam, njk, maxiter=500, tol=1e-05)
    jk = km.find_nearest(radec)
    if not km.converged:
        print 'k means did not converge'
    if plot:
        coords = SkyCoord(ra=ra, dec=dec, unit='degree')
        ra = coords.ra.wrap_at(180 * units.deg)
        dec = coords.dec
        plt.figure()
        plt.scatter(ra, dec, c=jk, lw=0, cmap='Paired', rasterized=True)
        plt.xlabel(r'RA', fontsize=12)
        plt.ylabel(r'Dec', fontsize=12)
        plt.tight_layout()
        plt.savefig('jk_kmeans.png')
    return jk
Ejemplo n.º 15
0
    def get_patches(self, centers, verbose=False):
        """
        Obtains JK subpatches using a spherical k-means algorithm (from Erin)

        :param centers: JK center coordinates (RA, DEC) or numbers

        :param verbose: passed to kmeans radec
        """

        if not np.iterable(centers):  # if centers is a number
            self.ncen = centers
            nsample = self.pos.shape[0] // 2
            self.km = krd.kmeans_sample(self.pos, ncen=self.ncen,
                                        nsample=nsample, verbose=verbose)
            if not self.km.converged:
                self.km.run(self.pos, maxiter=100)
            self.centers = self.km.centers
        else:  # if centers is an array of RA, DEC pairs
            assert len(centers.shape) == 2  # shape should be (:, 2)
            self.km = krd.KMeans(centers)
            self.centers = centers
            self.ncen = len(centers)

        self.labels = self.km.find_nearest(self.pos).astype(int)
        self.sub_labels = np.unique(self.labels)

        # indexes of clusters for subsample i
        self.indexes = [np.where(self.labels != ind)[0]
                        for ind in self.sub_labels]

        # indexes of clusters not in subsample i
        self.non_indexes = [np.where(self.labels == ind)[0]
                            for ind in self.sub_labels]

        self.dsx_sub = np.zeros(shape=(self.nbin, self.ncen))
        self.dst_sub = np.zeros(shape=(self.nbin, self.ncen))
Ejemplo n.º 16
0
from kmeans_radec import KMeans, kmeans_sample
from astropy.table import Table

lens = fitsio.read("lens.fits",
                   columns=["ra_gal", "dec_gal", "observed_redshift_gal"])

ra, dec = lens["ra_gal"], lens["dec_gal"]

ra_min, dec_min, ra_max, dec_max = 0, 0, 90, 90

Nr = 50000000
ran_ra = np.random.uniform(0, 360, Nr)
ran_dec = np.degrees(np.arcsin(np.random.uniform(-1, 1, Nr)))

ran_mask = (ran_ra > ra_min) & (ran_ra < ra_max) & (ran_dec > dec_min) & (
    ran_dec < dec_max)
ran_ra, ran_dec = ran_ra[ran_mask], ran_dec[ran_mask]

randoms = {'ra': ran_ra, 'dec': ran_dec}

coord = np.vstack([randoms['ra'], randoms['dec']]).T
ncen = 100
km = kmeans_sample(coord, ncen, maxiter=30, tol=1.0e-4)

labels = km.find_nearest(coord)

table = Table([coord[:, 0], coord[:, 1], labels],
              names=('RA', 'DEC', 'JK_LABEL'))
table.write('flagship_randoms_v2.fits', format='fits')
np.savetxt("flagship_jk_centers_v2.txt", km.centers)
Ejemplo n.º 17
0
def get_jkobj(radec_mat, njk):
    jkobj_map = kmeans_radec.kmeans_sample(radec_mat, njk, maxiter=200)
    return jkobj_map
Ejemplo n.º 18
0
def add_jackknife_both(lens_ds,
                       rand_ds,
                       njack,
                       lens_ds_2=None,
                       rand_ds_2=None):
    """Assign jackknife regions to random and lens catalogs.

    Parameters
    ----------
    lens_ds : numpy array
        Pre-compute results for lenses
    rand_ds : numpy array
        Pre-compute results for randoms
    njack : int
        Number of required jackknife fields
    lens_ds_2 : numpy array, optional
        Second pre-compute results for lenses. Default: None
    rand_ds_2 : numpy array, optional
        Second pre-compute results for randoms. Default: None

    Return
    ------
        Updated lens and random catalogs with `jk_field` information.
    """
    # Get field ID
    fields = get_field_id(lens_ds)

    # Make sure that `jk_field` key is available. If not, add one.
    try:
        lens_ds['jk_field']
    except ValueError:
        lens_ds = append_fields(lens_ds,
                                'jk_field',
                                np.zeros(len(lens_ds), int),
                                usemask=False)
    # The same for the random catalog.
    try:
        rand_ds['jk_field']
    except ValueError:
        rand_ds = append_fields(rand_ds,
                                'jk_field',
                                np.zeros(len(rand_ds), int),
                                usemask=False)

    # If njackfields = 1, just add 1 to everything, although that is
    # bad idea for resampling...print out a warning.
    if njack == 1:
        lens_ds['jk_field'] = 1
        rand_ds['jk_field'] = 1
        print("# Only one Jackknife field? Seriously?")
        return lens_ds, rand_ds

    if (lens_ds_2 is not None) and (rand_ds_2 is not None):
        try:
            lens_ds_2['jk_field']
        except ValueError:
            lens_ds_2 = append_fields(lens_ds_2,
                                      'jk_field',
                                      np.zeros(len(lens_ds_2), int),
                                      usemask=False)
        try:
            rand_ds_2['jk_field']
        except ValueError:
            rand_ds_2 = append_fields(rand_ds_2,
                                      'jk_field',
                                      np.zeros(len(rand_ds_2), int),
                                      usemask=False)

        if njack == 1:
            lens_ds['jk_field'] = 1
            rand_ds['jk_field'] = 1
            lens_ds_2['jk_field'] = 1
            rand_ds_2['jk_field'] = 1
            print("# Only one Jackknife field? Seriously?")
            return lens_ds, rand_ds, lens_ds_2, rand_ds_2

    # Use the results with more objects as reference
    # In principle, the random catalog should have many more objects than the lenses
    # TODO: Still, when the number of lens is smaller than a threshold,
    # We should do something else
    if len(rand_ds) > len(lens_ds):
        jk_fields_per_field = get_jk_regions_per_field(rand_ds, njack)
    else:
        jk_fields_per_field = get_jk_regions_per_field(lens_ds, njack)

    jk_next = 0

    if (lens_ds_2 is not None) and (rand_ds_2 is not None):
        # Make sure both pre-compute results share the same jackknife fields
        for i, field in enumerate(fields):
            rand_mask = rand_ds['field'] == field
            lens_mask = lens_ds['field'] == field

            rand_mask_2 = rand_ds_2['field'] == field
            lens_mask_2 = lens_ds_2['field'] == field

            if sum(rand_mask) == 0:
                continue

            rand_field = rand_ds[rand_mask]
            lens_field = lens_ds[lens_mask]

            rand_field_2 = rand_ds_2[rand_mask_2]
            lens_field_2 = lens_ds_2[lens_mask_2]

            # perform kmeans
            radec = np.column_stack((rand_field['ra'], rand_field['dec']))
            km = kmeans_radec.kmeans_sample(radec,
                                            jk_fields_per_field[i],
                                            maxiter=100,
                                            tol=1.0e-5,
                                            verbose=False)

            # assign jk_field, shifting up by jk_next as labels are 0-n
            rand_field['jk_field'] = km.labels + jk_next

            # kmeans centers
            ra_centers = np.array([k[0] for k in km.centers])
            dec_centers = np.array([k[1] for k in km.centers])

            # assign jackknife field in lens catalog based on nearest kmeans center
            for lens in lens_field:
                closest_jk = closest_point(lens['ra'], lens['dec'], ra_centers,
                                           dec_centers)
                lens['jk_field'] = closest_jk + jk_next

            for lens in lens_field_2:
                closest_jk = closest_point(lens['ra'], lens['dec'], ra_centers,
                                           dec_centers)
                lens['jk_field'] = closest_jk + jk_next

            for rand in rand_field_2:
                closest_jk = closest_point(rand['ra'], rand['dec'], ra_centers,
                                           dec_centers)
                rand['jk_field'] = closest_jk + jk_next

            # increment jk_next so that next field has higher jk numbers
            jk_next += jk_fields_per_field[i]

            # write back to the catalog.
            # Do this rather than recreating so that order is preserved
            rand_ds['jk_field'][rand_mask] = rand_field['jk_field']
            lens_ds['jk_field'][lens_mask] = lens_field['jk_field']

            rand_ds_2['jk_field'][rand_mask_2] = rand_field_2['jk_field']
            lens_ds_2['jk_field'][lens_mask_2] = lens_field_2['jk_field']

        return lens_ds, rand_ds, lens_ds_2, rand_ds_2
    else:
        for i, field in enumerate(fields):
            rand_mask = rand_ds['field'] == field
            lens_mask = lens_ds['field'] == field

            if sum(rand_mask) == 0 and sum(lens_mask) == 0:
                continue

            rand_field = rand_ds[rand_mask]
            lens_field = lens_ds[lens_mask]

            # perform kmeans
            radec = np.column_stack((rand_field['ra'], rand_field['dec']))
            km = kmeans_radec.kmeans_sample(radec,
                                            jk_fields_per_field[i],
                                            maxiter=100,
                                            tol=1.0e-5,
                                            verbose=False)

            # assign jk_field, shifting up by jk_next as labels are 0-n
            rand_field['jk_field'] = km.labels + jk_next

            # kmeans centers
            ra_centers = np.array([k[0] for k in km.centers])
            dec_centers = np.array([k[1] for k in km.centers])

            # assign jackknife field in lens catalog based on nearest kmeans center
            for lens in lens_field:
                closest_jk = closest_point(lens['ra'], lens['dec'], ra_centers,
                                           dec_centers)
                lens['jk_field'] = closest_jk + jk_next

            # increment jk_next so that next field has higher jk numbers
            jk_next += jk_fields_per_field[i]

            # write back to the catalog.
            # Do this rather than recreating so that order is preserved
            rand_ds['jk_field'][rand_mask] = rand_field['jk_field']
            lens_ds['jk_field'][lens_mask] = lens_field['jk_field']

        return lens_ds, rand_ds
Ejemplo n.º 19
0
def JackknifeOnSphere(jarrs, jras, jdecs, jfunc, jargs=[], jkwargs={}, jtype='generate', jfile=None, njack=24, generateonly=False, gindex=0, varonly=False, save=None):
    jarrs = EnforceArray2D(jarrs)
    jras = EnforceArray2D(jras)
    jdec = EnforceArray2D(jdecs)

    if jtype=='generate':
        rdi = np.zeros( (len(jarrs[gindex]),2) )
        rdi[:,0] = jarrs[gindex][jras[gindex]]
        rdi[:,1] = jarrs[gindex][jdecs[gindex]]

        if jfile is None:
            jfile = 'JK-{0}.txt'.format(njack)
        km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5)
        if not km.converged:
            raise RuntimeError("k means did not converge")
        np.savetxt(jfile, km.centers)
        if generateonly:
            return jfile

    elif jtype=='read':
        centers = np.loadtxt(jfile)
        km = kmeans_radec.KMeans(centers)
        njack = len(centers)
    
    ind = []
    for i in range(len(jarrs)):
        rdi = np.zeros( (len(jarrs[i]),2) )
        rdi[:,0] = jarrs[i][jras[i]]
        rdi[:,1] = jarrs[i][jdecs[i]]
        index = km.find_nearest(rdi)
        ind.append(index)
    
    full_j, full_other = jfunc(jarrs, *jargs, **jkwargs)
    full_j = EnforceArray2D(full_j)
    full_other = EnforceArray1D(full_other)

    it_j = []
    it_other = []
    frac = []
    for j in range(njack):
        print 'JK %i' %(j)

        ja = []
        f = []

        for i in range(len(full_other)):
            if j==0:
                it_other.append( [] )
        
        for i in range(len(full_j)):
            if j==0:
                it_j.append( [] )

        for i in range(len(jarrs)):
            if j==0:
                #it_j.append( [] )
                frac.append( [] )

            cut = (ind[i]==j)
            ja.append(jarrs[i][-cut])

            ff = np.sum(-cut)/float(len(cut))
            f.append(ff)

        i_j, i_other = jfunc(ja, *jargs, **jkwargs)
        i_j = EnforceArray2D(i_j)
        i_other = EnforceArray1D(i_other)

        for i in range(len(i_j)):
            it_j[i].append( np.copy(i_j[i]) )
        for i in range(len(jarrs)):
            frac[i].append( f[i] )
        for i in range(len(i_other)):
            it_other[i].append( np.copy(i_other[i]) )

    for i in range(len(it_j)):
        it_j[i] = np.array(it_j[i])

    for i in range(len(frac)):
        frac[i] = np.array(frac[i])


    cov_j = []
    for k in range(len(full_j)):

        if varonly:
            cov = np.power( np.std(it_j[k], axis=0), 2.0 ) * njack * float(njack-1)/njack
            cov_j.append(cov)

        else:
            csize = len(full_j[k])
            cov = np.zeros( (csize,csize) )
            
            for i in range(csize):
                for j in range(i, csize):
                    cov[i,j] =  np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j]) ) * float(njack-1)/njack
                    #cov[i,j] =  np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j])  * frac[k] )

                    if i!=j:
                        cov[j,i] = cov[i,j]
            cov_j.append(cov)

    if save is not None:
        
        vec = os.path.join(save, 'vec')
        cov = os.path.join(save, 'cov')
        other = os.path.join(save, 'other')
        
        Write2Dir(vec, full_j)
        Write2Dir(cov, cov_j)
        Write2Dir(other, full_other)

    return [full_j, cov_j, full_other, it_other]
Ejemplo n.º 20
0
def JackknifeOnSphere(jarrs,
                      jras,
                      jdecs,
                      jfunc,
                      jargs=[],
                      jkwargs={},
                      jtype='generate',
                      jfile=None,
                      njack=24,
                      generateonly=False,
                      gindex=0,
                      varonly=False,
                      save=None):
    jarrs = EnforceArray2D(jarrs)
    jras = EnforceArray2D(jras)
    jdec = EnforceArray2D(jdecs)

    if jtype == 'generate':
        rdi = np.zeros((len(jarrs[gindex]), 2))
        rdi[:, 0] = jarrs[gindex][jras[gindex]]
        rdi[:, 1] = jarrs[gindex][jdecs[gindex]]

        if jfile is None:
            jfile = 'JK-{0}.txt'.format(njack)
        km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5)
        if not km.converged:
            raise RuntimeError("k means did not converge")
        np.savetxt(jfile, km.centers)
        if generateonly:
            return jfile

    elif jtype == 'read':
        centers = np.loadtxt(jfile)
        km = kmeans_radec.KMeans(centers)
        njack = len(centers)

    ind = []
    for i in range(len(jarrs)):
        rdi = np.zeros((len(jarrs[i]), 2))
        rdi[:, 0] = jarrs[i][jras[i]]
        rdi[:, 1] = jarrs[i][jdecs[i]]
        index = km.find_nearest(rdi)
        ind.append(index)

    full_j, full_other = jfunc(jarrs, *jargs, **jkwargs)
    full_j = EnforceArray2D(full_j)
    full_other = EnforceArray1D(full_other)

    it_j = []
    it_other = []
    frac = []
    for j in range(njack):
        print 'JK %i' % (j)

        ja = []
        f = []

        for i in range(len(full_other)):
            if j == 0:
                it_other.append([])

        for i in range(len(full_j)):
            if j == 0:
                it_j.append([])

        for i in range(len(jarrs)):
            if j == 0:
                #it_j.append( [] )
                frac.append([])

            cut = (ind[i] == j)
            ja.append(jarrs[i][-cut])

            ff = np.sum(-cut) / float(len(cut))
            f.append(ff)

        i_j, i_other = jfunc(ja, *jargs, **jkwargs)
        i_j = EnforceArray2D(i_j)
        i_other = EnforceArray1D(i_other)

        for i in range(len(i_j)):
            it_j[i].append(np.copy(i_j[i]))
        for i in range(len(jarrs)):
            frac[i].append(f[i])
        for i in range(len(i_other)):
            it_other[i].append(np.copy(i_other[i]))

    for i in range(len(it_j)):
        it_j[i] = np.array(it_j[i])

    for i in range(len(frac)):
        frac[i] = np.array(frac[i])

    cov_j = []
    for k in range(len(full_j)):

        if varonly:
            cov = np.power(np.std(it_j[k], axis=0),
                           2.0) * njack * float(njack - 1) / njack
            cov_j.append(cov)

        else:
            csize = len(full_j[k])
            cov = np.zeros((csize, csize))

            for i in range(csize):
                for j in range(i, csize):
                    cov[i, j] = np.sum(
                        (it_j[k][:, i] - full_j[k][i]) *
                        (it_j[k][:, j] - full_j[k][j])) * float(njack -
                                                                1) / njack
                    #cov[i,j] =  np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j])  * frac[k] )

                    if i != j:
                        cov[j, i] = cov[i, j]
            cov_j.append(cov)

    if save is not None:

        vec = os.path.join(save, 'vec')
        cov = os.path.join(save, 'cov')
        other = os.path.join(save, 'other')

        Write2Dir(vec, full_j)
        Write2Dir(cov, cov_j)
        Write2Dir(other, full_other)

    return [full_j, cov_j, full_other, it_other]
Ejemplo n.º 21
0
        print "making JK's"

        mask_temp = (mask_sm > 0)  #*(ra>10)*(ra<90)
        ra_temp = ra[mask_temp]
        dec_temp = dec[mask_temp]
        pix_temp = pix[mask_temp]

        RADEC = np.zeros((len(ra_temp), 2))
        RADEC[:, 0] = ra_temp
        RADEC[:, 1] = dec_temp
        dilute_ids = np.arange(len(ra_temp))
        np.random.seed(seed)
        np.random.shuffle(dilute_ids)
        dilute_ids = dilute_ids[:len(ra_temp) / 10]
        km = kmeans_radec.kmeans_sample(RADEC[dilute_ids],
                                        Njk,
                                        maxiter=500,
                                        tol=1e-05)
        JK = np.zeros(hp.nside2npix(nside))
        JK[pix_temp] = km.find_nearest(RADEC) + 1

        fits = fitsio.FITS(sim_dir + 'buzzard_y1_' + str(shear) + '_' +
                           str(buzzid) + '_' + str(zbin) + '_mask_jk_' +
                           str(Smooth[i]) + '.fits',
                           'rw',
                           clobber=True)
        output = np.zeros(hp.nside2npix(nside), dtype=[('JK', 'f8')])
        output['JK'] = JK
        fits.write(output)

        mask_temp = (mask_data_sm > 0)  #*(ra>10)*(ra<90)
        ra_temp = ra[mask_temp]
Ejemplo n.º 22
0
def make_jk(ra_ran, dec_ran, ra, dec, N=100, dilute_factor=1, rand_out=1, large_mem=True, maxiter=500, tol=1e-05, seed=100, centers=False):
    """
    Given coordinate of random points, generate JK indecies 
    for another catalog of positions. Include the possibility 
    of diluting the random catalog. Return an array of JK 
    indicies the same length of ra and dec.  
    """

    RADEC_ran = np.zeros((len(ra_ran),2))
    RADEC_ran[:,0] = ra_ran
    RADEC_ran[:,1] = dec_ran

    RADEC = np.zeros((len(ra),2))
    RADEC[:,0] = ra
    RADEC[:,1] = dec

    np.random.seed(seed)
    ids = np.arange(len(ra_ran))
    np.random.shuffle(ids)
    RADEC_ran_dilute = np.zeros((int(len(ra_ran)/dilute_factor),2))
    RADEC_ran_dilute[:,0] = ra_ran[ids[:int(len(ra_ran)/dilute_factor)]]
    RADEC_ran_dilute[:,1] = dec_ran[ids[:int(len(ra_ran)/dilute_factor)]]

    km = kmeans_radec.kmeans_sample(RADEC_ran_dilute, N, maxiter=500, tol=1e-05)
    print(np.unique(km.labels))

    if large_mem == True:
        Ntotal = len(RADEC)
        Ntotal_ran = len(RADEC_ran)

        JK = np.array([])
        JK_ran = np.array([])

        for i in range(N-1):
            #print i
            JK = np.concatenate((JK, km.find_nearest(RADEC[i*int(Ntotal/N):(i+1)*int(Ntotal/N)])), axis=0)
            print(np.unique(JK))

            if rand_out==1:
                print(i)
                JK_ran = np.concatenate((JK_ran, km.find_nearest(RADEC_ran[i*int(Ntotal_ran/N):(i+1)*int(Ntotal_ran/N)])), axis=0)

        JK = np.concatenate((JK, km.find_nearest(RADEC[(N-1)*int(Ntotal/N):])), axis=0)
        if rand_out==1:
            JK_ran = np.concatenate((JK_ran, km.find_nearest(RADEC_ran[(N-1)*int(Ntotal_ran/N):])), axis=0)
        print('len of random', len(ra_ran))
        print('len of JK Random', len(JK_ran))
    else:
        JK = km.find_nearest(RADEC)
        if rand_out==1:
            JK_ran = km.find_nearest(RADEC_ran)

    if centers==True:
        #Saving the kmeans centers
        assert km.converged > 0, 'Kmeans did not converge! Try more iterations.'
        print('Saving Jackknife Centers...')
        np.savetxt('/project2/chihway/sims/buzzard/y1_gal_member/jk_centers',km.centers)
    
    if rand_out==1:    
        return JK_ran, JK
    else:
        return JK
Ejemplo n.º 23
0
def JackknifeOnSphere(jarrs, jras, jdecs, jfunc, jargs=[], jkwargs={}, jtype='generate', jfile=None, njack=24, generateonly=False, gindex=0):
    jarrs = EnforceArray2D(jarrs)
    jras = EnforceArray2D(jras)
    jdec = EnforceArray2D(jdecs)

    if jtype=='generate':
        rdi = np.zeros( (len(jarrs[gindex]),2) )
        rdi[:,0] = jarrs[gindex][jras[gindex]]
        rdi[:,1] = jarrs[gindex][jdecs[gindex]]

        if jfile is None:
            jfile = 'JK-{0}.txt'.format(njack)
        km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=100, tol=1.0e-5)
        if not km.converged:
            raise RuntimeError("k means did not converge")
        np.savetxt(jfile, km.centers)
        if generateonly:
            return jfile

    elif jtype=='read':
        centers = np.loadtxt(jfile)
        km = kmeans_radec.KMeans(centers)
        njack = len(centers)
    
    ind = []
    for i in range(len(jarrs)):
        rdi = np.zeros( (len(jarrs[i]),2) )
        rdi[:,0] = jarrs[i][jras[i]]
        rdi[:,1] = jarrs[i][jdecs[i]]
        index = km.find_nearest(rdi)
        ind.append(index)
    
    full_j, full_other = jfunc(jarrs, *jargs, **jkwargs)
    full_j = EnforceArray2D(full_j)
    full_other = EnforceArray1D(full_other)

    it_j = []
    it_other = [ [] ] * len(full_other)
    for j in range(njack):
        ja = []
        for i in range(len(jarrs)):
            cut = (ind[i]==j)
            ja.append(jarrs[i][-cut])
            
            if j==0:
                it_j.append( [] )

        i_j, i_other = jfunc(ja, *jargs, **jkwargs)
        i_j = EnforceArray2D(i_j)
        i_other = EnforceArray1D(i_other)
        for i in range(len(i_j)):
            it_j[i].append( np.copy(i_j[i]) )
        for i in range(len(i_other)):
            it_other[i].append(i_other[i])

    for i in range(len(it_j)):
        it_j[i] = np.array(it_j[i])

    cov_j = []
    for k in range(len(full_j)):
        csize = len(full_j[k])
        cov = np.zeros( (csize,csize) )
        
        for i in range(csize):
            for j in range(i, csize):
                cov[i,j] =  np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j]) ) * float(njack-1)/njack

                if i!=j:
                    cov[j,i] = cov[i,j]
        cov_j.append(cov)

    return [full_j, cov_j, full_other, it_other]