def test_merge_pdf1d_stats(self): """ Using cached versions of the observations, sed grid, and noise model, split the grids and do the fitting on the subgrids and original grid. Merge the results from the subgrids and compare to the results from fitting the full grid. """ ###################################### # STEP 1: GET SOME DATA TO WORK WITH # ###################################### # read in the observed data obsdata = Observations(self.obs_fname_cache, self.settings.filters, self.settings.obs_colnames) ######################################################################################### # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE # ######################################################################################### num_subgrids = 3 # Split SED grid sub_seds_trim_fnames = subgridding_tools.split_grid( self.seds_trim_fname_cache, num_subgrids, overwrite=True) # Split noise grid (a standardized function does not exist) sub_noise_trim_fnames = [] noisemodel_vals = noisemodel.get_noisemodelcat( self.noise_trim_fname_cache) slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]), num_subgrids) for i, slc in enumerate(slices): outname = self.noise_trim_fname_cache.replace( ".hd5", "sub{}.hd5".format(i)) with tables.open_file(outname, "w") as outfile: outfile.create_array(outfile.root, "bias", noisemodel_vals["bias"][slc]) outfile.create_array(outfile.root, "error", noisemodel_vals["error"][slc]) outfile.create_array(outfile.root, "completeness", noisemodel_vals["completeness"][slc]) sub_noise_trim_fnames.append(outname) # Collect information about the parameter rangers, to make the pdf1d bins # consistent between subgrids grid_info_dict = subgridding_tools.reduce_grid_info( sub_seds_trim_fnames, sub_noise_trim_fnames, nprocs=1, cap_unique=100) ################################################## # STEP 3: GENERATE FILENAMES AND RUN THE FITTING # ################################################## def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"): return [ base_fname.replace(extension, "gridsub{}{}".format(i, extension)) for i in range(num_subgrids) ] stats_fname = tempfile.NamedTemporaryFile(suffix=".fits").name pdf1d_fname = tempfile.NamedTemporaryFile(suffix=".fits").name lnp_fname = tempfile.NamedTemporaryFile(suffix=".hd5").name subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids) subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids) subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname, num_subgrids, extension=".hd5") for i in range(num_subgrids): sub_noisemodel_vals = noisemodel.get_noisemodelcat( sub_noise_trim_fnames[i]) fit.summary_table_memory( obsdata, sub_noisemodel_vals, sub_seds_trim_fnames[i], threshold=-40.0, save_every_npts=100, lnp_npts=500, stats_outname=subgrid_stats_fnames[i], pdf1d_outname=subgrid_pdf1d_fnames[i], lnp_outname=subgrid_lnp_fnames[i], grid_info_dict=grid_info_dict, do_not_normalize=True, ) # The do_not_normalize option is absolutely crucial! # Now merge the results merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats( subgrid_pdf1d_fnames, subgrid_stats_fnames) # Do a full fit also normal_stats = tempfile.NamedTemporaryFile(suffix=".fits").name normal_pdf1d = tempfile.NamedTemporaryFile(suffix=".fits").name normal_lnp = tempfile.NamedTemporaryFile(suffix=".hd5").name fit.summary_table_memory( obsdata, noisemodel_vals, self.seds_trim_fname_cache, threshold=-40.0, save_every_npts=100, lnp_npts=500, stats_outname=normal_stats, pdf1d_outname=normal_pdf1d, lnp_outname=normal_lnp, do_not_normalize=True, ) # Here, we also need to use do_not_normalize, otherwise Pmax will be # different by a factor # CHECKS tolerance = 1e-6 fits_normal = fits.open(normal_pdf1d) fits_new = fits.open(merged_pdf1d_fname) if not len(fits_new) == len(fits_normal): raise AssertionError() # A similar problem to the above will also occur here for k in range(1, len(fits_new)): qname = fits_new[k].header["EXTNAME"] np.testing.assert_allclose( fits_new[k].data, fits_normal[qname].data, rtol=tolerance, atol=tolerance, ) table_normal = Table.read(normal_stats) table_new = Table.read(merged_stats_fname) if not len(table_normal) == len(table_new): raise AssertionError() # These will normally fail, as the merging process can not be made # bit-correct due do floating point math (exacerbated by exponentials) for c in table_new.colnames: if c == "Name" or c == "RA" or c == "DEC": np.testing.assert_equal( table_normal[c], table_new[c], err_msg="column {} is not equal".format(c), ) else: np.testing.assert_allclose( table_normal[c], table_new[c], rtol=tolerance, equal_nan=True, err_msg="column {} is not close enough".format(c), )
def test_merge_pdf1d_stats(): ###################################### # STEP 1: GET SOME DATA TO WORK WITH # ###################################### vega_fname = download_rename("vega.hd5") obs_fname = download_rename("b15_4band_det_27_A.fits") noise_trim_fname = download_rename( "beast_example_phat_noisemodel_trim.grid.hd5") seds_trim_fname = download_rename("beast_example_phat_seds_trim.grid.hd5") # download cached version of fitting results # stats_fname_cache = download_rename('beast_example_phat_stats.fits') # pdf1d_fname_cache = download_rename('beast_example_phat_pdf1d.fits') # read in the observed data filters = [ "HST_WFC3_F275W", "HST_WFC3_F336W", "HST_ACS_WFC_F475W", "HST_ACS_WFC_F814W", "HST_WFC3_F110W", "HST_WFC3_F160W", ] basefilters = ["F275W", "F336W", "F475W", "F814W", "F110W", "F160W"] obs_colnames = [f.lower() + "_rate" for f in basefilters] obsdata = Observations(obs_fname, filters, obs_colnames, vega_fname=vega_fname) ######################################################################################### # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE # ######################################################################################### num_subgrids = 3 # Split SED grid sub_seds_trim_fnames = subgridding_tools.split_grid(seds_trim_fname, num_subgrids, overwrite=True) # Split noise grid (a standardized function does not exist) sub_noise_trim_fnames = [] noisemodel_vals = get_noisemodelcat(noise_trim_fname) slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]), num_subgrids) for i, slc in enumerate(slices): outname = noise_trim_fname.replace(".hd5", "sub{}.hd5".format(i)) with tables.open_file(outname, "w") as outfile: outfile.create_array(outfile.root, "bias", noisemodel_vals["bias"][slc]) outfile.create_array(outfile.root, "error", noisemodel_vals["error"][slc]) outfile.create_array(outfile.root, "completeness", noisemodel_vals["completeness"][slc]) sub_noise_trim_fnames.append(outname) # Collect information about the parameter rangers, to make the pdf1d bins # consistent between subgrids grid_info_dict = subgridding_tools.reduce_grid_info(sub_seds_trim_fnames, sub_noise_trim_fnames, nprocs=1, cap_unique=100) ################################################## # STEP 3: GENERATE FILENAMES AND RUN THE FITTING # ################################################## def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"): return [ base_fname.replace(extension, "gridsub{}{}".format(i, extension)) for i in range(num_subgrids) ] stats_fname = "/tmp/beast_example_phat_stats.fits" pdf1d_fname = "/tmp/beast_example_phat_pdf1d.fits" lnp_fname = "/tmp/beast_example_phat_lnp.hd5" subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids) subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids) subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname, num_subgrids, extension=".hd5") for i in range(num_subgrids): sub_noisemodel_vals = get_noisemodelcat(sub_noise_trim_fnames[i]) fit.summary_table_memory( obsdata, sub_noisemodel_vals, sub_seds_trim_fnames[i], threshold=-40.0, save_every_npts=100, lnp_npts=60, stats_outname=subgrid_stats_fnames[i], pdf1d_outname=subgrid_pdf1d_fnames[i], lnp_outname=subgrid_lnp_fnames[i], grid_info_dict=grid_info_dict, do_not_normalize=True, ) # The do_not_normalize option is absolutely crucial! # Now merge the results merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats( subgrid_pdf1d_fnames, subgrid_stats_fnames) # Do a full fit also normal_stats = "normal_stats.fits" normal_pdf1d = "normal_pdf1d.fits" normal_lnp = "normal_lnp.hd5" fit.summary_table_memory( obsdata, noisemodel_vals, seds_trim_fname, threshold=-40.0, save_every_npts=100, lnp_npts=60, stats_outname=normal_stats, pdf1d_outname=normal_pdf1d, lnp_outname=normal_lnp, do_not_normalize=True, ) # Here, we also need to use do_not_normalize, otherwise Pmax will be # different by a factor # CHECKS tolerance = 1e-6 print("comparing pdf1d") # fits_cache = fits.open(pdf1d_fname_cache) fits_normal = fits.open(normal_pdf1d) fits_new = fits.open(merged_pdf1d_fname) if not len(fits_new) == len(fits_normal): raise AssertionError() # A similar problem to the above will also occur here for k in range(1, len(fits_new)): qname = fits_new[k].header["EXTNAME"] print(qname) np.testing.assert_allclose(fits_new[k].data, fits_normal[qname].data, rtol=tolerance, atol=tolerance) print("comparing stats") # table_cache = Table.read(stats_fname_cache) table_normal = Table.read(normal_stats) table_new = Table.read(merged_stats_fname) if not len(table_normal) == len(table_new): raise AssertionError() # These will normally fail, as the merging process can not be made # bit-correct due do floating point math (exacerbated by exponentials) for c in table_new.colnames: print(c) if c == "Name" or c == "RA" or c == "DEC": np.testing.assert_equal( table_normal[c], table_new[c], err_msg="column {} is not equal".format(c), ) else: np.testing.assert_allclose( table_normal[c], table_new[c], rtol=tolerance, equal_nan=True, err_msg="column {} is not close enough".format(c), )