def cat_from_coords(self, coords, toler=0.5*u.arcsec, **kwargs): """ Return a cut-out of the catalog matched to input coordinates within a tolerance. Ordered by the input coordinate list. Entries without a match are Null with ID<0. Parameters ---------- coords : SkyCoord Single or array toler : Angle, optional verbose : bool, optional Returns ------- matched_cat : Table """ # Generate the dummy table if len(coords.shape) == 0: ncoord = 1 else: ncoord = coords.shape[0] matched_cat = Table(np.repeat(np.zeros_like(self.cat[0]), ncoord)) # Grab IDs IDs = self.match_coord(coords, toler=toler, **kwargs) # Find rows in catalog rows = match_ids(IDs, self.cat[self.idkey], require_in_match=False) # Fill gd_rows = rows >= 0 matched_cat[np.where(gd_rows)] = self.cat[rows[gd_rows]] # Null the rest matched_cat[self.idkey][np.where(~gd_rows)] = IDs[~gd_rows] # Return return matched_cat
def chk_in_group(self, IDs, group): """ Check whether a set of IDs are in a specified group Parameters ---------- IDs : int ndarray group : str Returns ------- answer : bool True if all in group in_out : bool ndarray True/False for each ID """ # Find rows in catalog cat_rows = match_ids(IDs, self.cat[self.idkey].data) # Flags sflag = self.group_dict[group] flags = self.cat['flag_group'][cat_rows] # Query on binary query = (flags % (sflag*2)) >= sflag # Answer answer = np.sum(query) == IDs.size # Return return answer, query
def groups_containing_IDs(self, IDs, igroup=None): """ Return a list of all groups that contain all of the input IDs Parameters ---------- IDs: int or ndarray igroup : list, optional List of groups to consider Default is the full list of DB groups Returns ------- gd_groups : list List of groups containing all of the input IDs """ if isinstance(IDs,int): IDs = np.array([IDs]) nIDs = IDs.size if igroup is None: igroup = self.groups # cat_rows = match_ids(IDs, self.cat[self.idkey]) flags = self.cat['flag_group'][cat_rows] gd_groups = [] for group in igroup: sflag = self.group_dict[group] # In the group? query = (flags & sflag).astype(bool) if np.sum(query) == nIDs: gd_groups.append(group) # Return return gd_groups
def grab_specmeta(self, rows, verbose=None, masking='edges', use_XSpec=True, **kwargs): """ Grab the spectra and meta data for an input set of rows Aligned to the rows input Parameters ---------- rows : int or ndarray verbose kwargs Returns ------- spec : XSpectrum1D or ndarray Spectra requested, ordered by the input rows meta : Table -- THIS MAY BE DEPRECATED Meta table, ordered by the input rows """ if isinstance(rows, (int, np.int64)): rows = np.array([rows]) # Insures meta and other arrays are proper if verbose is None: verbose = self.verbose # Check spectra even exist! (can be only meta data) if 'spec' not in list(self.hdf[self.group].keys()): warnings.warn("No spectra in group: {:s}".format(self.group)) return None, None # Check memory if self.stage_data(rows, **kwargs): if verbose: print("Loaded spectra") # Load msk = np.array([False] * len(self.meta)) msk[rows] = True tmp_data = self.hdf[self.group]['spec'][msk] # Replicate and sort according to input rows idx = match_ids(rows, np.where(msk)[0]) data = tmp_data[idx] else: print("Staging failed.. Not returning spectra") return # Generate XSpectrum1D if 'co' in data.dtype.names: co = data['co'] else: co = None if use_XSpec: spec = XSpectrum1D(data['wave'], data['flux'], sig=data['sig'], co=co, masking=masking) else: spec = data # Return return spec, self.meta[rows]
def add_ids(maindb, meta, flag_g, tkeys, idkey, first=False, **kwargs): """ Add IDs to Input meta table has its CAT_ID values set in place Parameters ---------- maindb : Table Main catalog meta : Table Meta table being added flag_g : int Flag for the new group tkeys : list List of main keys for the catalog ikdey : str ID key first : bool, optional First call to the routine? Returns ------- maindb : Table Updated catalog table """ newcut, new, ids = set_new_ids(maindb, meta, idkey, first=first, **kwargs) # If new sources if np.sum(new) > 0: newcut['flag_group'] = flag_g newcut.rename_column('RA_GROUP', 'RA') newcut.rename_column('DEC_GROUP', 'DEC') newcut.rename_column('zem_GROUP', 'zem') cat_meta = newcut[tkeys] # Set or append if first: maindb = cat_meta else: # Update group flags old_ids = ids[~new] midx = match_ids( old_ids, maindb[idkey].data) # np.array(maindb[idkey][ids[~new]]) maindb['flag_group'][midx] += flag_g # ASSUMES NOT SET ALREADY if np.sum(new) > 0: # Catalog assert chk_maindb_join(maindb, cat_meta) # Append maindb = vstack([maindb, cat_meta], join_type='exact') # Return return maindb
def find_ids_in_groups(self, groups, IDs=None, in_all=True): """ Return a list of IDs of sources located in one or more groups. If IDs is input, the subset that are within the input groups is returned. Default is to require the source occur in all of the input the groups. Use in_all=False to only require the source be in at least one of the groups. Parameters ---------- groups : list List of groups to consider, e.g. ['BOSS-DR12', 'SDSS_DR7'] IDs : ndarray, optional If not input, use the entire catalog of IDs in_all : bool, optional Require that the source(s) be within *all* of the input groups Default is to require it be within at least one group Returns ------- gdIDs : int array IDs in the group(s) good : bool array True/False for ID within group(s) Mainly useful if user inputs a set of IDs """ # Init ngroup = len(groups) if IDs is None: IDs = self.cat[self.idkey].data # Flags cat_rows = match_ids(IDs, self.cat[self.idkey].data, require_in_match=True) fs = self.cat['flag_group'][cat_rows].data msk = np.zeros_like(fs).astype(int) for group in groups: flag = self.group_dict[group] # In the group? query = (fs % (flag*2)) >= flag msk[query] += 1 if in_all: good = msk == ngroup else: good = msk >= 1 gdIDs = IDs[good] # Return return gdIDs, good
def cat_from_ids(self, IDs): """ Parameters ---------- IDs : ndarray IDKEY values Returns ------- matched_cat : Table Catalog entries matching the input IDs """ # Find rows in catalog rows = match_ids(IDs, self.cat[self.idkey], require_in_match=True) # Fill matched_cat = self.cat[rows] # Return return matched_cat
def groupids_to_rows(self, group_IDs): """ Convert GROUP_ID values to rows in the meta table Mainly used to then grab the corresponding spectra Parameters ---------- group_IDs : int or ndarray Returns ------- rows : ndarray """ # Checking if isinstance(group_IDs, int): group_IDs = np.array( [group_IDs]) # Insures meta and other arrays are proper # Find rows rows = match_ids(group_IDs, self.meta['GROUP_ID']) # Return return rows
def meta_from_coords(self, coords, cat_query=None, meta_query=None, groups=None, first=True, **kwargs): """ Return meta data for an input set of coordinates Parameters ---------- coords : SkyCoord Expecting an array of coordinates cat_query : dict, optional Query the catalog meta_query : dict, optional Query the meta tables groups : list, optional If provided, the meta data of the groups are searched in the list order first : bool, optional Only provide the first entry found for the source kwargs Returns ------- matches : bool array True if the coordinate + query matches in database final_meta : masked Table or list If first=True (default), the method returns a masked Table with each row aligned to the input coordinates. Entries that do not match are fully masked. The entry is the first one found (looping over groups). If first=False, this is a list of bool arrays that point to the entries in the stack table (which follows). This avoids generating N Tables which is very slow stack : Table, optional Only returned if first=False """ from specdb.cat_utils import match_ids # Cut down using source catalog matches, matched_cat, IDs = self.qcat.query_coords( coords, query_dict=cat_query, groups=groups, **kwargs) gdIDs = np.where(IDs >= 0)[0] # Setup if meta_query is None: query_dict = {} else: query_dict = meta_query.copy() query_dict[self.idkey] = IDs[gdIDs].tolist() # Generate sub_groups for looping -- One by one is too slow for N > 100 # This just requires a bit more book-keeping all_fgroup = np.unique(matched_cat['flag_group']) sub_groups = [] for group, bit in self.group_dict.items(): if np.sum(all_fgroup & bit) > 0: sub_groups.append(group) # If groups was input, cut down and order by groups if groups is not None: new_sub = [] for group in groups: if group in sub_groups: new_sub.append(group) # Replace sub_groups = new_sub # Loop on sub_groups meta_list = [] meta_groups = [] for sub_group in sub_groups: # Need to call this query_meta to add in GROUP name meta = self.query_meta(query_dict, groups=[sub_group], **kwargs) if meta is not None: meta_list.append(meta) meta_groups.append(sub_group) # Stack if len(meta_list) == 0: matches[:] = False if first: return matches, None else: return matches, [None] * matches.size elif len(meta_list) == 1: stack = meta_list[0] else: stack = spdbu.clean_vstack(meta_list, meta_groups) # Book-keeping if first: final_meta = Table(np.repeat(np.zeros_like(stack[0]), len(IDs)), masked=True) # Find good IDs in stacked Table rows = match_ids(IDs[gdIDs], stack[self.idkey], require_in_match=False) gd_rows = rows >= 0 # Fill final_meta[gdIDs[gd_rows]] = stack[rows[gd_rows]] # Mask bad rows but fill in IDs -- Faster to work on columns matches[gdIDs[~gd_rows]] = False msk_rows = np.where(~matches)[0] for key in final_meta.keys(): final_meta[key].mask[msk_rows] = True #for row in np.where(~matches)[0]: # final_meta.mask[row] = [True]*len(final_meta.mask[row]) final_meta[self.idkey][np.where(~matches)] = IDs[~matches] print("Final query yielded {:d} matches with group meta data.". format(np.sum(matches))) # Return return matches, final_meta else: final_list = [None] * matches.size # Loop on coords gdI = np.where(matches)[0] for ii, jj in enumerate(gdI): if self.verbose & ((ii % 100) == 0): print('Done with {:d} of {:d}'.format(ii, len(gdI))) gd_rows = stack[self.idkey] == IDs[jj] final_list[jj] = gd_rows return matches, final_list, stack
def tpe_stack_lris(dv=100 * u.km / u.s): """ Testing stacks with LRIS """ # Load sample ipos = this_file.rfind('/') if ipos == -1: path = './' else: path = this_file[0:ipos] tpe = Table.read(path + '/../TPE_DR12_31.2_spec.fits') # Load spectra # Coordiantes b_coords = SkyCoord(ra=tpe['BG_RA'], dec=tpe['BG_DEC'], unit='deg') f_coords = SkyCoord(ra=tpe['FG_RA'], dec=tpe['FG_DEC'], unit='deg') # Cut on impact parameter and BOSS kpc_amin = cosmo.kpc_comoving_per_arcmin(tpe['FG_Z']) # kpc per arcmin ang_seps = b_coords.separation(f_coords) rho = ang_seps.to('arcmin') * kpc_amin / (1 + tpe['FG_Z']) cut_Rlris = (rho.to('Mpc').value < 4) & (tpe['BG_LYA_INSTRUMENT'] == 'LRIS' ) # & ( #tpe['FG_Z'] > 2.) # Some of these have too low z (just barely) # Cut gd_b_coords = b_coords[cut_Rlris] gd_tpe = tpe[cut_Rlris] # Grab these spectra from QPQ # For boss, we are ok taking the first entry of each # The returned set is aligned with the input coords qpq = IgmSpec(db_file=qpq_file, skip_test=True) IDs = qpq.qcat.match_coord(gd_b_coords, group='LRIS') meta = qpq['LRIS'].meta gcut = meta['GRATING'] == '1200/3400' # There is one with B400 B1200 = np.in1d(IDs, meta['PRIV_ID'][gcut]) print("There are {:d} sources without B1200".format(np.sum(~B1200))) # Cut again gd_b_coords = gd_b_coords[B1200] gd_tpe = gd_tpe[B1200] gd_IDs = IDs[B1200] # Find the rows idx = cat_utils.match_ids(gd_IDs, meta['PRIV_ID']) rows = meta['GROUP_ID'][idx] pdb.set_trace() spec, meta = qpq.coords_to_spectra(gd_b_coords, 'LRIS', all_spec=False) # Check for continua has_co = np.array([True] * spec.nspec) for ii in range(spec.nspec): # Select spec.select = ii # Match to lya lya = (1 + gd_tpe['FG_Z'][ii]) * 1215.67 * u.AA iwave = np.argmin(np.abs(spec.wavelength - lya)) # Check for co #coval = spec.co[iwave] #print('spec: {:d} with co={:g}'.format(ii, coval)) if np.isclose(spec.co[iwave], 0.) or np.isclose(spec.co[iwave], 1.): has_co[ii] = False # Slice to good co print("{:d} BOSS spectra with a continuum".format(np.sum(has_co))) co_spec = spec[has_co] co_spec.normed = True # Apply continuum # NEED TO ZERO OUT REGIONS WITHOUT CONTINUUM # May also wish to isolate in wavelength to avoid rejected pixels for ii in range(co_spec.nspec): co_spec.select = ii co = co_spec.co.value bad_pix = np.any([(co == 0.), (co == 1.)], axis=0) co_spec.add_to_mask(bad_pix, compressed=True) # Rebin to rest zarr = gd_tpe['FG_Z'][has_co] rebin_spec = lspu.rebin_to_rest(co_spec, zarr, dv) # Stack stack = lspu.smash_spectra(rebin_spec) # Plot plot_stack(stack, 'LRIS_stack.pdf') return stack