def chk_vstack(hdf): """ Check whether the meta data in a specdb database can be stacked with specdb.utils.clean_vstack Parameters ---------- hdf : HDF5 pointer Returns ------- chk : bool """ meta_tables = [] labels = [] for key in hdf.keys(): try: meta = Table(hdf[key]['meta'].value) except (KeyError, ValueError): print("Skipping data group {:s}".format(key)) else: # Save a snippet meta_tables.append(meta[0:1]) labels.append(key) # Try to stack try: stack = clean_vstack(meta_tables, labels) except: chk = False else: print("Passing chk_vstack...") chk = True # Return return chk
def run_tst(sdb): # Load tbls = [] for group in sdb.groups: tbls.append(sdb[group].meta[0:1]) # Stack stack = utils.clean_vstack(tbls, sdb.groups)
def meta_from_coords(self, coords, cat_query=None, meta_query=None, groups=None, first=True, **kwargs): """ Return meta data for an input set of coordinates Parameters ---------- coords : SkyCoord Expecting an array of coordinates cat_query : dict, optional Query the catalog meta_query : dict, optional Query the meta tables groups : list, optional If provided, the meta data of the groups are searched in the list order first : bool, optional Only provide the first entry found for the source kwargs Returns ------- matches : bool array True if the coordinate + query matches in database final_meta : masked Table or list If first=True (default), the method returns a masked Table with each row aligned to the input coordinates. Entries that do not match are fully masked. The entry is the first one found (looping over groups). If first=False, this is a list of bool arrays that point to the entries in the stack table (which follows). This avoids generating N Tables which is very slow stack : Table, optional Only returned if first=False """ from specdb.cat_utils import match_ids # Cut down using source catalog matches, matched_cat, IDs = self.qcat.query_coords( coords, query_dict=cat_query, groups=groups, **kwargs) gdIDs = np.where(IDs >= 0)[0] # Setup if meta_query is None: query_dict = {} else: query_dict = meta_query.copy() query_dict[self.idkey] = IDs[gdIDs].tolist() # Generate sub_groups for looping -- One by one is too slow for N > 100 # This just requires a bit more book-keeping all_fgroup = np.unique(matched_cat['flag_group']) sub_groups = [] for group, bit in self.group_dict.items(): if np.sum(all_fgroup & bit) > 0: sub_groups.append(group) # If groups was input, cut down and order by groups if groups is not None: new_sub = [] for group in groups: if group in sub_groups: new_sub.append(group) # Replace sub_groups = new_sub # Loop on sub_groups meta_list = [] meta_groups = [] for sub_group in sub_groups: # Need to call this query_meta to add in GROUP name meta = self.query_meta(query_dict, groups=[sub_group], **kwargs) if meta is not None: meta_list.append(meta) meta_groups.append(sub_group) # Stack if len(meta_list) == 0: matches[:] = False if first: return matches, None else: return matches, [None] * matches.size elif len(meta_list) == 1: stack = meta_list[0] else: stack = spdbu.clean_vstack(meta_list, meta_groups) # Book-keeping if first: final_meta = Table(np.repeat(np.zeros_like(stack[0]), len(IDs)), masked=True) # Find good IDs in stacked Table rows = match_ids(IDs[gdIDs], stack[self.idkey], require_in_match=False) gd_rows = rows >= 0 # Fill final_meta[gdIDs[gd_rows]] = stack[rows[gd_rows]] # Mask bad rows but fill in IDs -- Faster to work on columns matches[gdIDs[~gd_rows]] = False msk_rows = np.where(~matches)[0] for key in final_meta.keys(): final_meta[key].mask[msk_rows] = True #for row in np.where(~matches)[0]: # final_meta.mask[row] = [True]*len(final_meta.mask[row]) final_meta[self.idkey][np.where(~matches)] = IDs[~matches] print("Final query yielded {:d} matches with group meta data.". format(np.sum(matches))) # Return return matches, final_meta else: final_list = [None] * matches.size # Loop on coords gdI = np.where(matches)[0] for ii, jj in enumerate(gdI): if self.verbose & ((ii % 100) == 0): print('Done with {:d} of {:d}'.format(ii, len(gdI))) gd_rows = stack[self.idkey] == IDs[jj] final_list[jj] = gd_rows return matches, final_list, stack