def align(self,tabref, taba):

        tabajoin = join(tabref,taba,keys=['time'],join_type='left')

        cols = tabajoin.columns

        csel = []
        
        for c in cols:
            if '_2' in c and 'index' not in c and 'band' not in c:
                csel.append(c)

        csel.append('time')
        csel.append('band_1')


        tabanew = Table(tabajoin[csel])
        tabanew['flux_2'].fill_value=0.0
        tabanew = tabanew.filled()
        
        for vv in csel:
            if '_2' in vv or '_1' in vv:
                tabanew[vv].name= '_'.join(vv.split('_')[:-1])

        return unique(tabanew, keys=['time','band'])
Beispiel #2
0
    def get_catalog(self,
                    query=None,
                    query_fields=None,
                    print_query=False,
                    exclude_gaia=False,
                    **kwargs):
        """
        Grab a catalog of sources around the input coordinate to the search radius
        
        Args:
            query: SQL query
            query_fields (list, optional): Over-ride list of items to query
            exclude_gaia (bool,optional): If the field 'gaia_pointsource' is present and is 1,
                                         remove those objects from the output catalog.
            print_query (bool): Print the SQL query generated 

        Returns:
            astropy.table.Table:  Catalog of sources returned

        """
        # Query
        main_cat = super(DECaL_Survey,
                         self).get_catalog(query_fields=query_fields,
                                           print_query=print_query,
                                           **kwargs)
        main_cat = Table(main_cat, masked=True)
        #
        for col in main_cat.colnames:
            main_cat[col].mask = np.isnan(main_cat[col])
        #Convert SNR to mag error values.
        snr_cols = [
            colname for colname in main_cat.colnames if "snr" in colname
        ]
        for col in snr_cols:
            main_cat[col].mask = main_cat[col] < 0
            main_cat[col] = 2.5 * np.log10(1 + 1 / main_cat[col])

        main_cat = main_cat.filled(-99.0)
        #Remove gaia objects if necessary
        if exclude_gaia:
            self.catalog = main_cat[main_cat['gaia_pointsource'] == 0]
        else:
            self.catalog = main_cat
        # Clean
        main_cat = catalog_utils.clean_cat(main_cat, photom['DECaL'])
        self.validate_catalog()
        # Return
        return self.catalog
    def save_on_txt(self, fileName, survey="DES"):
        t = Table(masked=True)
        # OBS is used to reproduce original SNPhotCC files can be deleted
        #
        # provided to change init method of Supernova class
        colNames = [
                    ["OBS", "{:4s}"],
                    ["MJD", "{0:9.3f}"], ["BAND", "{:s}"], ["FIELD", "{:6s}"]
                    ]
        if self.r.magFlag:
            colNames.extend([["MAG", "{0:7.3f}"], ["MAG_ERR", "{0:7.3f}"]])
        else:
            colNames.extend([["FLUX", "{0:10.5f}"], ["FLUX_ERR", "{0:10.5f}"]])

        bandArr = list()
        mjd = list()
        flux = list()
        fluxErr = list()
        mjdArgsort = list()

        for b in self.lcsDict.keys():
            if self.lcsDict[b].badCurve:
                continue

            if len(bandArr) == 0:
                bandArr = [b]*len(self.lcsDict[b].mjd)
                # bandArr = np.empty(len(self.lcsDict[b].mjd), dtype=np.str)
                # bandArr[:] = b
                mjd = self.lcsDict[b].mjd
                flux = self.lcsDict[b].flux
                fluxErr = self.lcsDict[b].fluxErr
            else:
                tmp = [b]*len(self.lcsDict[b].mjd)
                # tmp = np.empty(len(self.lcsDict[b].mjd), dtype=np.str)
                # tmp[:] = b
                bandArr.extend(tmp)
                # bandArr = np.concatenate((bandArr, tmp))
                mjd.extend(self.lcsDict[b].mjd)
                flux.extend(self.lcsDict[b].flux)
                fluxErr.extend(self.lcsDict[b].fluxErr)
                # mjd = np.concatenate((mjd, self.lcsDict[b].mjd))
                # flux = np.concatenate((flux, self.lcsDict[b].flux))
                # fluxErr = np.concatenate((fluxErr, self.lcsDict[b].fluxErr))

        mjdArgsort = np.argsort(mjd)

        mjd = [mjd[i] for i in mjdArgsort]
        flux = [flux[i] for i in mjdArgsort]
        fluxErr = [fluxErr[i] for i in mjdArgsort]
        bandArr = [bandArr[i] for i in mjdArgsort]

        """
        Adding and setting column to Table
        """

        for c in range(len(colNames)):
            if colNames[c][0] == "BAND" \
            or colNames[c][0] == "OBS" \
            or colNames[c][0] == "FIELD":
                col = MaskedColumn(np.zeros(len(mjd)),
                    name=colNames[c][0],
                    format=colNames[c][1],
                    dtype=np.str, fill_value='-',
                    mask=np.zeros(len(mjd))
                    )
            else:
                col = MaskedColumn(np.zeros(len(mjd)),
                    name=colNames[c][0],
                    format=colNames[c][1],
                    dtype=np.float, fill_value=-9,
                    mask=np.zeros(len(mjd)))
            t.add_column(col)

        """
        Initializing columns
        """

        t["OBS"] = np.empty(len(mjd), dtype=np.str)
        t["OBS"][:] = "OBS:"
        t["FIELD"] = np.empty(len(mjd), dtype=np.str)
        t["FIELD"][:] = "NULL"
        t["MJD"] = mjd
        t["BAND"] = bandArr
        t["FLUX"] = flux
        t["FLUX_ERR"] = fluxErr

        t.filled()

        fOut = open(fileName, 'w')
        fOut.write("# File produced by Miniature Adventure on " + \
            "{:<02d}/{:<02d}/{:<4d} at {:<02d}:{:<02d}:{:<02d} GMT\n".format(
                time.gmtime().tm_mday, time.gmtime().tm_mon,
                time.gmtime().tm_year,
                time.gmtime().tm_hour, time.gmtime().tm_min,
                time.gmtime().tm_sec))
        fOut.write("SURVEY:  {:<}\n".format(survey))
        fOut.write("SNID:  {:<d}\n".format(self.SNID))
        fOut.write("GPKERNEL: {:<}\n".format(self.kern))
        # if self.SNTypeInt :
        fOut.write("SNTYPE: {:>d}\n".format(self.SNTypeInt))
        # if self.RADeg :
        fOut.write("RA:     {:>9.6f} deg\n".format(self.RADeg))
        # if self.decDeg :
        fOut.write("DECL:   {:>9.6f} deg\n".format(self.decDeg))
        # if self.MWEBV :
        fOut.write("MWEBV:  {:>6.4f}\n".format(self.MWEBV))
        if hasattr(self, "zSpec"):
            if self.zSpec:
                fOut.write("REDSHIFT_SPEC:  {:>6.4f} +- {:>6.4f}\n".format(
                    self.zSpec, self.zSpecErr
                    ))
            else:
                fOut.write("REDSHIFT_SPEC: -9.0000 +- 9.0000\n")
        if hasattr(self, "hostGalaxyID"):
            fOut.write("HOST_GALAXY_GALID: {:>d}\n".format(self.hostGalaxyID))
        if hasattr(self, "zPhotHost"):
            fOut.write("HOST_GALAXY_PHOTO-Z:  {:>6.4f} +- {:>6.4f}\n".format(
                self.zPhotHost, self.zPhotHostErr
                ))
        # if self.ccMjdMaxFlux != 0:
        fOut.write("MJD_MAX_FLUX-CCF:  {:>9.3f}\n".format(self.ccMjdMaxFlux))
        fOut.write("\n\n\n")
        fOut.write("# ======================================\n")
        fOut.write("# LIGHT CURVE FIT USING GAUSSIAN PROCESS\n")
        fOut.write("#\n")
        fOut.write("# NOBS: {:<}\n".format(len(mjd)))

        ascii.write(t, output=fOut, delimiter='  ',
            format='fixed_width_two_line')

        fOut.close()
Beispiel #4
0
def _GaiaDR2Match(row,
                  fC,
                  match_radius=1,
                  gaia_mag_tolerance=0.5,
                  id_check=True):

    flags = 0

    coo = SkyCoord(row['_RAJ2000'],
                   row['_DEJ2000'],
                   frame='icrs',
                   unit=(u.hourangle, u.deg))
    s = coo.to_string('decimal', precision=5).split()
    _ = StringIO()
    with redirect_stdout(_), redirect_stderr(_):
        job = Gaia.launch_job(_query.format(s[0], s[1]))
    DR2Table = job.get_results()

    # Replace missing values for pmra, pmdec, parallax
    DR2Table['pmra'].fill_value = 0.0
    DR2Table['pmdec'].fill_value = 0.0
    DR2Table['parallax'].fill_value = 0.0
    DR2Table = Table(DR2Table.filled(), masked=True)
    # Avoid problems with small/negative parallaxes
    DR2Table['parallax'].mask = DR2Table['parallax'] <= 0.1
    DR2Table['parallax'].fill_value = 0.0999
    DR2Table = DR2Table.filled()
    # Fix units for proper motion columns
    DR2Table['pmra'].unit = 'mas / yr'
    DR2Table['pmdec'].unit = 'mas / yr'

    cat = SkyCoord(DR2Table['ra'],
                   DR2Table['dec'],
                   frame='icrs',
                   distance=Distance(parallax=DR2Table['parallax'].quantity),
                   pm_ra_cosdec=DR2Table['pmra'],
                   pm_dec=DR2Table['pmdec'],
                   obstime=Time(2015.5,
                                format='decimalyear')).apply_space_motion(
                                    new_obstime=Time('2000-01-01 00:00:00.0'))
    idx, d2d, _ = coo.match_to_catalog_sky(cat)
    if d2d > match_radius * u.arcsec:
        raise ValueError('No Gaia DR2 source within specified match radius')

    try:
        key = re.match('[AFGKM][0-9]', row['SpTy'])[0]
        GV = SpTypeToGminusV[key]
    except TypeError:
        flags += 1024
        GV = -0.15
    try:
        Gmag = float(row['Gmag'])
    except ValueError:
        raise ValueError('Invalid Gmag value ', row['Gmag'])
    except KeyError:
        Gmag = row['Vmag'] + GV

    if abs(Gmag - DR2Table['phot_g_mean_mag'][idx]) > gaia_mag_tolerance:
        if 'Gmag' in row.colnames:
            print("Input value: G = ", Gmag)
        else:
            print("Input values: V = {:5.2f}, SpTy = {} -> G_est = {:5.2f}".
                  format(row['Vmag'], row['SpTy'], Gmag))
        print("Catalogue values: G = {:5.2f}, Source = {}".format(
            DR2Table['phot_g_mean_mag'][idx], DR2Table['source_id'][idx]))
        raise ValueError('Nearest Gaia source does not match estimated G mag')

    if (str(row['Old_Gaia_DR2']) != str(DR2Table['source_id'][idx])):
        if id_check:
            raise ValueError('Nearest Gaia DR2 source does not match input ID')
        flags += 32768

    gmag = np.array(DR2Table['phot_g_mean_mag'])
    sep = coo.separation(cat)
    if any((sep <= 51 * u.arcsec) & (gmag < gmag[idx])):
        flags += 16384
    if any((sep > 51 * u.arcsec) & (sep < 180 * u.arcsec)
           & (gmag < gmag[idx])):
        flags += 8192

    gflx = np.ma.array(10**(-0.4 * (gmag - gmag[idx])),
                       mask=False,
                       fill_value=0.0)
    gflx.mask[idx] = True
    contam = np.nansum(gflx.filled() * fC(cat.separation(cat[idx]).arcsec))

    if contam > 1:
        flags += 4096
    elif contam > 0.1:
        flags += 2048

    return DR2Table[idx], contam, flags, cat[idx]
Beispiel #5
0
		count3=count3+1
		
		
extnvnox=Table(data,copy=True)
extnvx=Table(data,copy=True)
qsov=Table(data,copy=True)
count=[0,0,0]
#print len(data)
for i in range(0,len(data)):
	#print i,data['class'][i],len(data)
	if data['class'][i]=='GALAXY' or data['class'][i]=='NELG':
		if data['SOFT_FLUX'][i]>8*10^(-15):
			extnvnox.remove_row(count[0])
			qsov.remove_row(count[2])
			count[1]=count[1]+1
		else:
			extnvx.remove_row(count[1])
			qsov.remove_row(count[2])
			count[0]=count[0]+1
	else:
		extnvnox.remove_row(count[0])
		extnvx.remove_row(count[1])
		count[2]=count[2]+1
		
ascii.write(data.filled(), output, format='tab')	
ascii.write(extnvnox.filled(), output+'_extnvnox', format='tab')	
ascii.write(extnvx.filled(), output+'_extnvx', format='tab')	
ascii.write(qsov.filled(), output+'_qsov', format='tab')	
ascii.write(data3.filled(), output+'_allbands', format='tab')	 
	
Beispiel #6
0
import os
import numpy as np
import healpy as hp
from astropy.table import Table, vstack
from make_mocks import read_buzzard_catalog, ra_dec_in_region

region = 1
nside = 8
pixel = np.arange(hp.nside2npix(nside))
ra_pixel, dec_pixel = hp.pix2ang(nside, pixel, nest=True, lonlat=True)
pixel_use = pixel[ra_dec_in_region(ra_pixel, dec_pixel, region)]

table_b = Table()
for pixel in pixel_use:
    table_b = vstack([table_b, read_buzzard_catalog(pixel)[::10]])
table_b = table_b.filled()
table_b.meta['bands'] = ['g', 'r', 'i', 'z', 'y', 'w1', 'w2']
table_b.keep_columns(['ra', 'dec'])
table_b.write(os.path.join('region_{}'.format(region), 'ra_dec_sample.hdf5'),
              overwrite=True,
              path='data')
Beispiel #7
0
def main(args):

    output = 'region_{}'.format(args.region)

    if not os.path.isdir(output):
        os.makedirs(output)

    print('Reading raw buzzard catalog...')
    nside = 8
    pixel = np.arange(hp.nside2npix(nside))
    ra_pixel, dec_pixel = hp.pix2ang(nside, pixel, nest=True, lonlat=True)
    pixel_use = pixel[ra_dec_in_region(ra_pixel, dec_pixel, args.region)]

    table_b = Table()
    for pixel in pixel_use:
        table_b = vstack([
            table_b,
            read_buzzard_catalog(pixel, mag_lensed=(args.stage >= 2))
        ])
    table_b = table_b.filled()
    table_b.meta['area'] = hp.nside2pixarea(nside,
                                            degrees=True) * len(pixel_use)
    table_b.meta['bands'] = ['g', 'r', 'i', 'z', 'y', 'w1', 'w2']
    np.random.seed(0)
    table_b['random_1'] = np.random.random(size=len(table_b))
    table_b['random_2'] = np.random.random(size=len(table_b))
    table_b['randint'] = np.random.randint(3, size=len(table_b))

    if args.stage in [0, 3]:

        sample = ['BGS', 'BGS', 'LRG', 'LRG']
        z_min = [0.1, 0.3, 0.5, 0.7]
        z_max = [0.3, 0.5, 0.7, 0.9]

        for lens_bin in range(4):

            print('Reading lens catalog for z-bin {}...'.format(lens_bin))
            if lens_bin <= 1:
                table_l = table_b[is_BGS(table_b)]
            else:
                table_l = table_b[is_LRG(table_b)]
            table_l.rename_column('z_true', 'z')
            table_l = table_l[(z_min[lens_bin] <= table_l['z'])
                              & (table_l['z'] < z_max[lens_bin])]
            table_l['w_sys'] = 1.0
            if args.stage == 0:
                table_l['w_sys'] = table_l['w_sys'] * table_l['mu']
            print('Writing lens catalog for z-bin {}...'.format(lens_bin))
            table_l.keep_columns(['z', 'ra', 'dec', 'mag', 'w_sys'])
            fname = 'l{}_nofib'.format(lens_bin)
            if args.stage == 0:
                fname = fname + '_nomag'
            fname = fname + '.hdf5'
            table_l.write(os.path.join(output, fname),
                          overwrite=args.overwrite,
                          path='catalog',
                          serialize_meta=True)

            if args.stage != 0:
                continue

            print('Reading random catalog for z-bin {}...'.format(lens_bin))
            table_r = read_random_catalog(args.region, sample[lens_bin])
            table_r = table_r[(z_min[lens_bin] <= table_r['z'])
                              & (table_r['z'] < z_max[lens_bin])]
            print('Writing random catalog for z-bin {}...'.format(lens_bin))
            table_r.write(os.path.join(output, 'r{}.hdf5'.format(lens_bin)),
                          overwrite=args.overwrite,
                          path='catalog')

    if args.stage in [0, 1, 2]:

        if args.stage == 0:
            print('Making tailored source catalog...')
            table_s = subsample_source_catalog(table_b)
            table_s = apply_observed_shear(table_s)
            table_s = apply_shape_noise(table_s, 0.28)
            table_s = apply_photometric_redshift(table_s, None)
            table_s['w'] = table_s['mu']

            z_bins = [0.5, 0.7, 0.9, 1.1, 1.5]

            for source_bin in range(len(z_bins) - 1):
                print('Writing source catalog for z-bin {}...'.format(
                    source_bin))
                use = ((z_bins[source_bin] <= table_s['z']) &
                       (table_s['z'] < z_bins[source_bin + 1]))
                table_s_z_bin = table_s[use]
                table_s_z_bin.write(os.path.join(
                    output, 's{}_gen_nomag.hdf5'.format(source_bin)),
                                    overwrite=args.overwrite,
                                    serialize_meta=True,
                                    path='catalog')
                table_c = table_s_z_bin[np.random.randint(len(table_s_z_bin),
                                                          size=100000)]
                table_c.meta = {}
                table_c.write(os.path.join(
                    output, 'c{}_gen_nomag.hdf5'.format(source_bin)),
                              overwrite=args.overwrite,
                              path='catalog',
                              serialize_meta=True)

        else:
            for survey in ['des', 'hsc', 'kids']:

                z_bins = z_source_bins[survey]

                print('Making {}-like source catalog...'.format(survey))

                print('Reading in reference catalogs...')
                table_s_ref = read_real_source_catalog(survey)
                table_c_ref = read_real_calibration_catalog(survey)

                print('Assigning photometric redshifts...')
                table_s = apply_photometric_redshift(table_b, table_c_ref)

                print('Downsampling to target density...')
                table_s = subsample_source_catalog(table_s,
                                                   table_s_ref=table_s_ref,
                                                   survey=survey)

                print('Calculating observed shear...')
                table_s = apply_observed_shear(table_s,
                                               table_s_ref=table_s_ref,
                                               survey=survey)

                print('Applying shape noise...')
                if survey in ['des', 'kids']:

                    if survey == 'des':
                        sigma = np.array([0.26, 0.29, 0.27, 0.29])
                    else:
                        sigma = np.array([0.276, 0.269, 0.290, 0.281, 0.294])

                    sigma = sigma[np.digitize(table_s['z'], z_bins) - 1]

                else:

                    sigma = 1.0 / np.sqrt(table_s['w'])

                table_s = apply_shape_noise(table_s, sigma)

                if args.stage < 2:
                    table_s['w'] = table_s['w'] * table_s['mu']

                for source_bin in range(len(z_bins) - 1):

                    print('Writing source catalog for z-bin {}...'.format(
                        source_bin))
                    use = ((z_bins[source_bin] <= table_s['z']) &
                           (table_s['z'] < z_bins[source_bin + 1]))
                    table_s_z_bin = table_s[use]
                    fname = 's{}_{}'.format(source_bin, survey)
                    if args.stage == 1:
                        fname = fname + '_nomag'
                    fname = fname + '.hdf5'
                    table_s_z_bin.write(os.path.join(output, fname),
                                        overwrite=args.overwrite,
                                        path='catalog',
                                        serialize_meta=True)
                    fname = 'c' + fname[1:]
                    table_c = table_s_z_bin[np.random.randint(
                        len(table_s_z_bin), size=1000000)]
                    table_c.meta = {'bands': table_c.meta['bands']}
                    table_c.write(os.path.join(output, fname),
                                  overwrite=args.overwrite,
                                  path='catalog',
                                  serialize_meta=True)

    print('Finished!')

    return
Beispiel #8
0
def run():
    EPS = 0.35
    MIN_SAMPLES = 8
    N_ELEM = 9
    N_CHEM = 500
    N_RV = 300
    N_CUT = 1
    DATAFILE_NAME = 'results-unregularized-matched.fits'
    FEATURE_NAMES = ['APOGEE_ID', 'GLON', 'GLAT', 'RA', 'DEC', 'VHELIO_AVG', 'LOGG', 'TEFF', 'PMRA', 'PMDEC', 
                 'AL_H', 'NA_H', 'O_H', 'MG_H','C_H', 'N_H', 'V_H', 'TI_H', 'CA_H','FE_H', 'K_H', 'MN_H', 
                 'NI_H', 'SI_H', 'S_H', 'SNR']
    ELEMENT_NAMES = ['V_H', 'TI_H', 'CA_H','FE_H', 'K_H', 'MN_H', 'NI_H', 'SI_H', 'S_H']
    MEMBERFILE_NAME = 'table4.dat'

    ## load data from APOGEE
    ap_file = fits.open(DATAFILE_NAME)
    ap_data = ap_file[1].data
    feature_names = np.array(FEATURE_NAMES)
    element_names = np.array(ELEMENT_NAMES)
    elements = np.array([name.replace('_H', '').title() for name in element_names])
    print "The following elements are used for clustering: "
    print elements

    ## append data into columns
    ap_cols = []
    for name in feature_names:
        ap_cols.append(ap_data.field(name))
    ap_cols = np.array(ap_cols)
    ap_cols = ap_cols.T

    ## create a table with the columns
    dtype = ['float' for n in range(len(feature_names))]
    dtype[0] = 'string'
    ap_table = Table(data=ap_cols, names=feature_names, dtype=dtype)

    ## load membership file
    known_clusters = np.loadtxt(MEMBERFILE_NAME, usecols=(0, 1), dtype=('S', 'S'), unpack=True)
    member_IDs = known_clusters[0]
    member_names = known_clusters[1]
    labels = np.zeros(len(member_IDs))-1
    cluster_names = list(set(member_names))
    print "The following clusters are in the dataset: "
    print cluster_names

    ## add membership and numerical label to table
    k = 0
    for name in cluster_names:
        index = np.where(member_names == name)[0]
        labels[index] = k
        k += 1
    names = ['APOGEE_ID', 'cluster_name', 'label']
    dtype=['string', 'string', 'int']
    member_table = Table(data=[member_IDs, member_names, labels], names=names, dtype=dtype)
    ap_table = join(ap_table, member_table, keys='APOGEE_ID', join_type='left')

    ## fill missing values
    ap_table['cluster_name'].fill_value = 'background'
    ap_table['label'].fill_value = -1
    for element in element_names:
        ap_table[element].mask = np.isnan(ap_table[element])
        ap_table[element].fill_value = -9999.
    ap_table = ap_table.filled()

    ## get stars with valid values for all elements
    ap_stars = np.arange(len(ap_table))
    for element in element_names:
        ap_stars = np.intersect1d(ap_stars, np.where(ap_table[element] > -9999.)[0])
    ap_table = ap_table[ap_stars]
    print "There are %i stars with valid values for all elements."%len(ap_table)
    halo_index = np.where((ap_table['GLAT'] < -10.) | (ap_table['GLAT'] > 10.))[0]
    ap_table_halo = ap_table[halo_index]
    print "In the halo, there are %i stars with 15 elements."%len(ap_table_halo)

    ## get globular cluster members with valid values for all elements
    globular_names = np.array(['M107', 'M53', 'M92', 'M67', 'M5', 'M13', 'M3', 'M2', 'M15', 'N5466'])
    globular_members = np.array([], dtype='int')
    globular_labels = np.array([], dtype='int')
    k = 0
    save_list = []
    for name in globular_names:
        cluster_members = np.where(ap_table_halo['cluster_name'] == name)[0]
        if len(cluster_members) <= 0:
            print "%s has %i members"%(name, 0)
        else:
            cluster_labels = ap_table_halo['label'][cluster_members][0]
            globular_members = np.append(globular_members, cluster_members)
            globular_labels = np.append(globular_labels, cluster_labels)
            save_list.append(k)
            print "%s has %i members"%(name, len(cluster_members))
        k += 1
    globular_names = np.array([globular_names[i] for i in save_list])
    print "The following globular clusters are in the dataset: "
    print globular_names
    print "The numerical labels for globular clusters are as follows: "
    print globular_labels

    ## compose a matrix that contains chemical abundances and radial velocity
    Fe_index = np.where(element_names == 'FE_H')[0][0]
    chem = [ap_table_halo[element]-ap_table_halo['FE_H'] for element in element_names]
    chem[Fe_index] = ap_table_halo['FE_H']
    chem.append(ap_table_halo['VHELIO_AVG'])
    chem_RV = np.array(chem).T
    chem = np.delete(chem_RV,-1,1)
    print "The shape of the matrix is ",
    print chem_RV.shape
    
    ## get the nearest neighbors in chemical-velocity space
    indices = get_friends(chem_RV, len(elements), N_CHEM, N_RV)

    ## histogram over numbers of neighbors to find min_samples
    lengths = np.array([len(indices[n]) for n in range(len(indices))])
    H, edges = np.histogram(lengths)
    print "Number of Stars/ Threshold"
    for n in range(len(H)):
        print H[n], edges[n+1]

    ## select stars with more than N_cut neighbors
    non_noise = np.where(lengths > N_CUT)[0]
    print "%i stars will be used for clustering."%len(non_noise)

    ## show remaining globular clusters
    for name in globular_names:
        members_gc = np.where(ap_table_halo['cluster_name'] == name)[0]
        remain = np.intersect1d(members_gc, non_noise)
        if len(remain) <= 0:
            print "%s has %i members left"%(name, 0)
        else:
            print "%s has %i members, %.2f percent remaining"%(name, len(remain), len(remain)*100.0/len(members_gc))

    ## compose distance matrix
    S = lil_matrix((len(non_noise), len(non_noise)))
    for (n, m, dist) in iterator_dist(indices[non_noise]):    
        S[n,m] = dist   
        S[m,n] = dist

    ## DBSCAN clustering
    db = DBSCAN(eps=EPS, min_samples=MIN_SAMPLES, metric='precomputed', n_jobs=-1).fit(S, lengths[non_noise])
    labels = db.labels_
    n_clumps = np.amax(labels) + 1
    print "%i clusters found"%n_clumps
    print "#Categorized as Member/ Ratio of Member"
    print len(np.where(labels != -1)[0]), len(np.where(labels != -1)[0])*1.0/len(labels)

    ap_table_halo[non_noise].write('ap_table_halo_nn.csv')
    pickle.dump(labels, open("SNN_DBSCAN_labels.p", "wb"))
    pickle.dump(S, open("SNN_distance_matrix.p", "wb"))