def remove_filters_from_files(catfile, physgrid, obsgrid, outbase, rm_filters): # remove the requested filters from the catalog file cat = Table.read(catfile) for cfilter in rm_filters: colname = "{}_rate".format(cfilter) if colname in cat.colnames: cat.remove_column(colname) else: print("{} not in catalog file".format(colname)) cat.write("{}_cat.fits".format(outbase), overwrite=True) # get the sed grid and process g0 = FileSEDGrid(physgrid, backend="cache") filters = g0.header["filters"].split(" ") shortfilters = [(cfilter.split("_"))[-1].lower() for cfilter in filters] nlamb = [] nfilters = [] rindxs = [] for csfilter, clamb, cfilter in zip(shortfilters, g0.lamb, filters): if csfilter not in rm_filters: nlamb.append(clamb) nfilters.append(cfilter) else: rindxs.append(shortfilters.index(csfilter)) nseds = np.delete(g0.seds, rindxs, 1) print("orig filters: {}".format(" ".join(filters))) print(" new filters: {}".format(" ".join(nfilters))) g = SpectralGrid(np.array(nlamb), seds=nseds, grid=g0.grid, backend="memory") g.grid.header["filters"] = " ".join(nfilters) g.writeHDF("{}_sed.grid.hd5".format(outbase)) # get and process the observation model obsgrid = noisemodel.get_noisemodelcat(obsgrid) with tables.open_file("{}_noisemodel.grid.hd5".format(outbase), "w") as outfile: outfile.create_array(outfile.root, "bias", np.delete(obsgrid.root.bias, rindxs, 1)) outfile.create_array(outfile.root, "error", np.delete(obsgrid.root.error, rindxs, 1)) outfile.create_array( outfile.root, "completeness", np.delete(obsgrid.root.completeness, rindxs, 1), )
def remove_filters_from_files(catfile, physgrid, obsgrid, outbase, rm_filters): # remove the requested filters from the catalog file cat = Table.read(catfile) for cfilter in rm_filters: colname = '{}_rate'.format(cfilter) if colname in cat.colnames: cat.remove_column(colname) else: print('{} not in catalog file'.format(colname)) cat.write('{}_cat.fits'.format(outbase), overwrite=True) # get the sed grid and process g0 = FileSEDGrid(physgrid, backend='cache') filters = g0.header['filters'].split(' ') shortfilters = [(cfilter.split('_'))[-1].lower() for cfilter in filters] nlamb = [] nfilters = [] rindxs = [] for csfilter, clamb, cfilter in zip(shortfilters, g0.lamb, filters): if csfilter not in rm_filters: nlamb.append(clamb) nfilters.append(cfilter) else: rindxs.append(shortfilters.index(csfilter)) nseds = np.delete(g0.seds, rindxs, 1) print('orig filters: {}'.format(' '.join(filters))) print(' new filters: {}'.format(' '.join(nfilters))) g = SpectralGrid(np.array(nlamb), seds=nseds, grid=g0.grid, backend='memory') g.grid.header['filters'] = ' '.join(nfilters) g.writeHDF('{}_sed.grid.hd5'.format(outbase)) # get and process the observation model obsgrid = noisemodel.get_noisemodelcat(obsgrid) with tables.open_file('{}_noisemodel.grid.hd5'.format(outbase), 'w') \ as outfile: outfile.create_array(outfile.root, 'bias', np.delete(obsgrid.root.bias, rindxs, 1)) outfile.create_array(outfile.root, 'error', np.delete(obsgrid.root.error, rindxs, 1)) outfile.create_array(outfile.root, 'completeness', np.delete(obsgrid.root.completeness, rindxs, 1))
def remove_filters_from_files( catfile, physgrid=None, obsgrid=None, outbase=None, physgrid_outfile=None, rm_filters=None, beast_filt=None, ): """ Remove filters from catalog, physics grid, and/or obsmodel grid. This has two primary use cases: 1. When making simulated observations, you want to test how your fit quality changes with different combinations of filters. In that case, put in files for both `physgrid` and `obsgrid`. Set `rm_filters` to the filter(s) you wish to remove, and they will be removed both from those and from the catalog file. The three new files will be output with the name prefix set in `outbase`. 2. When running the BEAST, you have a master physics model grid with all filters present in the survey, but some fields don't have observations in all of those filters. In that case, put the master grid in `physgrid` and set `rm_filters` to None. The catalog will be used to determine the filters to remove (if any). `obsgrid` should be left as None, because in this use case, the obsmodel grid has not yet been generated. The output physics model grid will be named using the filename in `physgrid_outfile` (if given) or with the prefix in `outbase`. Parameters ---------- catfile : string file name of photometry catalog physgrid : string (default=None) If set, remove filters from this physics model grid obsgrid : string (default=None) If set, remove filters from this obsmodel grid outbase : string (default=None) Path+file to prepend to all output file names. Useful for case 1 above. physgrid_outfile : string (default=None) Path+name of the output physics model grid. Useful for case 2 above. rm_filters : string or list of strings (default=None) If set, these are the filters to remove from all of the files. If not set, only the filters present in catfile will be retained in physgrid and/or obsgrid. beast_filt : list of strings Sometimes there is ambiguity in the filter name (e.g., the grid has both HST_ACS_WFC_F475W and HST_WFC3_F475W, and the filter name is F475W). Set this to the BEAST filter name to resolve any ambiguities. For example, ['HST_WFC3_F475W', 'HST_WFC3_F814W'] ensures that these are the names used for F475W and F814W. """ # read in the photometry catalog cat = Table.read(catfile) # if rm_filters set, remove the requested filters from the catalog if rm_filters is not None: for cfilter in np.atleast_1d(rm_filters): colname = "{}_rate".format(cfilter) if colname.upper() in cat.colnames: cat.remove_column(colname.upper()) elif colname.lower() in cat.colnames: cat.remove_column(colname.lower()) else: print("{} not in catalog file".format(colname)) cat.write("{}_cat.fits".format(outbase), overwrite=True) # if rm_filters not set, extract the filter names that are present if rm_filters is None: cat_filters = [f[:-5].upper() for f in cat.colnames if f[-4:].lower() == "rate"] # if beast_filt is set, make a list of the short versions if beast_filt is not None: beast_filt_short = [(f.split("_"))[-1].upper() for f in beast_filt] # if physgrid set, process the SED grid if physgrid is not None: # read in the sed grid g0 = FileSEDGrid(physgrid, backend="cache") # extract info filters = g0.header["filters"].split(" ") shortfilters = [(cfilter.split("_"))[-1].upper() for cfilter in filters] rindxs = [] rgridcols = [] # loop through filters and determine what needs deleting for csfilter, cfilter in zip(shortfilters, filters): # -------------------------- # if the user chose the filters to remove if rm_filters is not None: # if the current filter is in the list of filters to remove if csfilter in np.atleast_1d(rm_filters): # if there's a list of BEAST instrument+filter references if beast_filt is not None: # if the current filter is in the list of BEAST references if csfilter in beast_filt_short: # if it's the same instrument, delete it # (if it's not the same instrument, keep it) if beast_filt[beast_filt_short.index(csfilter)] == cfilter: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # if the current filter isn't in the BEAST ref list, delete it else: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # if there isn't a list of BEAST refs, delete it else: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # -------------------------- # if the removed filters are determined from the catalog file if rm_filters is None: # if the current filter is present in the catalog filters if csfilter in cat_filters: # if there's a list of BEAST instrument+filter references # (if there isn't a list of BEAST refs, keep it) if beast_filt is not None: # if the current filter is in the list of BEAST references # (if the current filter isn't in the BEAST ref list, keep it) if csfilter in beast_filt_short: # if it's not the same instrument, delete it # (if it's the same instrument, keep it) if beast_filt[beast_filt_short.index(csfilter)] != cfilter: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # if the current filter isn't in the catalog filters, delete it else: rindxs.append(filters.index(cfilter)) for grid_col in g0.grid.colnames: if cfilter in grid_col: rgridcols.append(grid_col) # delete column(s) nseds = np.delete(g0.seds, rindxs, 1) nlamb = np.delete(g0.lamb, rindxs, 0) nfilters = np.delete(filters, rindxs, 0) for rcol in rgridcols: g0.grid.delCol(rcol) print("orig filters: {}".format(" ".join(filters))) print(" new filters: {}".format(" ".join(nfilters))) # save the modified grid g = SpectralGrid(np.array(nlamb), seds=nseds, grid=g0.grid, backend="memory") g.grid.header["filters"] = " ".join(nfilters) if physgrid_outfile is not None: g.writeHDF(physgrid_outfile) elif outbase is not None: g.writeHDF("{}_seds.grid.hd5".format(outbase)) else: raise ValueError("Need to set either outbase or physgrid_outfile") # if obsgrid set, process the observation model if obsgrid is not None: obsgrid = noisemodel.get_noisemodelcat(obsgrid) with tables.open_file("{}_noisemodel.grid.hd5".format(outbase), "w") as outfile: outfile.create_array( outfile.root, "bias", np.delete(obsgrid["bias"], rindxs, 1) ) outfile.create_array( outfile.root, "error", np.delete(obsgrid["error"], rindxs, 1) ) outfile.create_array( outfile.root, "completeness", np.delete(obsgrid["completeness"], rindxs, 1), )
def trim_models( sedgrid, sedgrid_noisemodel, obsdata, sed_outname, noisemodel_outname, sigma_fac=3.0, n_detected=4, inFlux=True, trunchen=False, ): """ For a given set of observations, there will be models that are so bright or faint that they will always have ~0 probability of fitting the data. This program trims those models out of the SED grid so that time is not spent calculating model points that are always zero probability. Parameters ---------- sedgrid: grid.SEDgrid instance model grid sedgrid_noisemodel: beast noisemodel instance noise model data obsdata: Observation object instance observation catalog sed_outname: str name for output sed file noisemodel_outname: str name for output noisemodel file sigma_fac: float factor for trimming the upper and lower range of grid so that the model range cuts off sigma_fac above and below the brightest and faintest models, respectively (default: 3.) n_detected: int minimum number of bands where ASTs yielded a detection for a given model, if fewer detections than n_detected this model gets eliminated (default: 4) inFlux: boolean if true data are in fluxes (default: True) trunchen: boolean if true use the trunchen noise model (default: False) """ # Store the brigtest and faintest fluxes in each band (for data and asts) n_filters = len(obsdata.filters) min_data = np.zeros(n_filters) max_data = np.zeros(n_filters) min_models = np.zeros(n_filters) max_models = np.zeros(n_filters) for k, filtername in enumerate(obsdata.filters): sfiltname = obsdata.data.resolve_alias(filtername) if inFlux: min_data[k] = np.amin(obsdata.data[sfiltname] * obsdata.vega_flux[k]) max_data[k] = np.amax(obsdata.data[sfiltname] * obsdata.vega_flux[k]) else: min_data[k] = np.amin(10**(-0.4 * obsdata.data[sfiltname]) * obsdata.vega_flux[k]) max_data[k] = np.amax(10**(-0.4 * obsdata.data[sfiltname]) * obsdata.vega_flux[k]) min_models[k] = np.amin(sedgrid.seds[:, k]) max_models[k] = np.amax(sedgrid.seds[:, k]) # first remove all models that have any band with fluxes below the # faintest ASTs run # when the noisemodel was computed, models with fluxes below the # faintest ASTs were tagged with a negative error/uncertainty # identify the models that have been detected in enough bands # the idea here is that if the ASTs are not measured that means # that *none* were recovered and this implies # that no model with these values would be recovered and thus the # probability should always be zero model_unc = sedgrid_noisemodel.root.error[:] above_ast = model_unc > 0 sum_above_ast = np.sum(above_ast, axis=1) indxs, = np.where(sum_above_ast >= n_detected) # cache the noisemodel values model_bias = sedgrid_noisemodel.root.bias[:] model_unc = np.fabs(sedgrid_noisemodel.root.error[:]) model_compl = sedgrid_noisemodel.root.completeness[:] if trunchen: model_q_norm = sedgrid_noisemodel.root.q_norm[:] model_icov_diag = sedgrid_noisemodel.root.icov_diag[:] model_icov_offdiag = sedgrid_noisemodel.root.icov_offdiag[:] if len(indxs) <= 0: raise ValueError("no models are brighter than the minimum ASTs run") n_ast_indxs = len(indxs) # Find models with fluxes (with margin) between faintest and brightest data for k in range(n_filters): print("working on filter # = ", k) # Get upper and lower values for the models given the noise model # sigma_fac defaults to 3. model_val = sedgrid.seds[indxs, k] + model_bias[indxs, k] model_down = model_val - sigma_fac * model_unc[indxs, k] model_up = model_val + sigma_fac * model_unc[indxs, k] nindxs, = np.where((model_up >= min_data[k]) & (model_down <= max_data[k])) if len(nindxs) > 0: indxs = indxs[nindxs] if len(indxs) == 0: raise ValueError("no models that are within the data range") print("number of original models = ", len(sedgrid.seds[:, 0])) print("number of ast trimmed models = ", n_ast_indxs) print("number of trimmed models = ", len(indxs)) # Save the grid print("Writing trimmed sedgrid to disk into {0:s}".format(sed_outname)) cols = {} for key in list(sedgrid.grid.keys()): cols[key] = sedgrid.grid[key][indxs] # New column to save the index of the model in the full grid cols["fullgrid_idx"] = indxs.astype(int) g = SpectralGrid(sedgrid.lamb, seds=sedgrid.seds[indxs], grid=Table(cols), backend="memory") filternames = obsdata.filters g.grid.header["filters"] = " ".join(filternames) # trimmed grid name g.writeHDF(sed_outname) # save the trimmed noise model print("Writing trimmed noisemodel to disk into {0:s}".format( noisemodel_outname)) with tables.open_file(noisemodel_outname, "w") as outfile: outfile.create_array(outfile.root, "bias", model_bias[indxs]) outfile.create_array(outfile.root, "error", model_unc[indxs]) outfile.create_array(outfile.root, "completeness", model_compl[indxs]) if trunchen: outfile.create_array(outfile.root, "q_norm", model_q_norm[indxs]) outfile.create_array(outfile.root, "icov_diag", model_icov_diag[indxs]) outfile.create_array(outfile.root, "icov_offdiag", model_icov_offdiag[indxs])