Example #1
0
    def test_merge_pdf1d_stats(self):
        """
        Using cached versions of the observations, sed grid, and noise model,
        split the grids and do the fitting on the subgrids and original
        grid.  Merge the results from the subgrids and compare to the results
        from fitting the full grid.
        """
        ######################################
        # STEP 1: GET SOME DATA TO WORK WITH #
        ######################################

        # read in the observed data
        obsdata = Observations(self.obs_fname_cache, self.settings.filters,
                               self.settings.obs_colnames)

        #########################################################################################
        # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE #
        #########################################################################################
        num_subgrids = 3

        # Split SED grid
        sub_seds_trim_fnames = subgridding_tools.split_grid(
            self.seds_trim_fname_cache, num_subgrids, overwrite=True)

        # Split noise grid (a standardized function does not exist)
        sub_noise_trim_fnames = []

        noisemodel_vals = noisemodel.get_noisemodelcat(
            self.noise_trim_fname_cache)
        slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]),
                                                  num_subgrids)
        for i, slc in enumerate(slices):
            outname = self.noise_trim_fname_cache.replace(
                ".hd5", "sub{}.hd5".format(i))
            with tables.open_file(outname, "w") as outfile:
                outfile.create_array(outfile.root, "bias",
                                     noisemodel_vals["bias"][slc])
                outfile.create_array(outfile.root, "error",
                                     noisemodel_vals["error"][slc])
                outfile.create_array(outfile.root, "completeness",
                                     noisemodel_vals["completeness"][slc])
            sub_noise_trim_fnames.append(outname)

        # Collect information about the parameter rangers, to make the pdf1d bins
        # consistent between subgrids
        grid_info_dict = subgridding_tools.reduce_grid_info(
            sub_seds_trim_fnames,
            sub_noise_trim_fnames,
            nprocs=1,
            cap_unique=100)

        ##################################################
        # STEP 3: GENERATE FILENAMES AND RUN THE FITTING #
        ##################################################
        def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"):
            return [
                base_fname.replace(extension,
                                   "gridsub{}{}".format(i, extension))
                for i in range(num_subgrids)
            ]

        stats_fname = tempfile.NamedTemporaryFile(suffix=".fits").name
        pdf1d_fname = tempfile.NamedTemporaryFile(suffix=".fits").name
        lnp_fname = tempfile.NamedTemporaryFile(suffix=".hd5").name

        subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids)
        subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids)
        subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname,
                                                 num_subgrids,
                                                 extension=".hd5")

        for i in range(num_subgrids):
            sub_noisemodel_vals = noisemodel.get_noisemodelcat(
                sub_noise_trim_fnames[i])
            fit.summary_table_memory(
                obsdata,
                sub_noisemodel_vals,
                sub_seds_trim_fnames[i],
                threshold=-40.0,
                save_every_npts=100,
                lnp_npts=500,
                stats_outname=subgrid_stats_fnames[i],
                pdf1d_outname=subgrid_pdf1d_fnames[i],
                lnp_outname=subgrid_lnp_fnames[i],
                grid_info_dict=grid_info_dict,
                do_not_normalize=True,
            )
            # The do_not_normalize option is absolutely crucial!

        # Now merge the results
        merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats(
            subgrid_pdf1d_fnames, subgrid_stats_fnames)

        # Do a full fit also
        normal_stats = tempfile.NamedTemporaryFile(suffix=".fits").name
        normal_pdf1d = tempfile.NamedTemporaryFile(suffix=".fits").name
        normal_lnp = tempfile.NamedTemporaryFile(suffix=".hd5").name
        fit.summary_table_memory(
            obsdata,
            noisemodel_vals,
            self.seds_trim_fname_cache,
            threshold=-40.0,
            save_every_npts=100,
            lnp_npts=500,
            stats_outname=normal_stats,
            pdf1d_outname=normal_pdf1d,
            lnp_outname=normal_lnp,
            do_not_normalize=True,
        )
        # Here, we also need to use do_not_normalize, otherwise Pmax will be
        # different by a factor

        # CHECKS
        tolerance = 1e-6
        fits_normal = fits.open(normal_pdf1d)
        fits_new = fits.open(merged_pdf1d_fname)

        if not len(fits_new) == len(fits_normal):
            raise AssertionError()

        # A similar problem to the above will also occur here
        for k in range(1, len(fits_new)):
            qname = fits_new[k].header["EXTNAME"]
            np.testing.assert_allclose(
                fits_new[k].data,
                fits_normal[qname].data,
                rtol=tolerance,
                atol=tolerance,
            )

        table_normal = Table.read(normal_stats)
        table_new = Table.read(merged_stats_fname)

        if not len(table_normal) == len(table_new):
            raise AssertionError()

        # These will normally fail, as the merging process can not be made
        # bit-correct due do floating point math (exacerbated by exponentials)
        for c in table_new.colnames:
            if c == "Name" or c == "RA" or c == "DEC":
                np.testing.assert_equal(
                    table_normal[c],
                    table_new[c],
                    err_msg="column {} is not equal".format(c),
                )
            else:
                np.testing.assert_allclose(
                    table_normal[c],
                    table_new[c],
                    rtol=tolerance,
                    equal_nan=True,
                    err_msg="column {} is not close enough".format(c),
                )
Example #2
0
def test_merge_pdf1d_stats():
    ######################################
    # STEP 1: GET SOME DATA TO WORK WITH #
    ######################################
    vega_fname = download_rename("vega.hd5")
    obs_fname = download_rename("b15_4band_det_27_A.fits")
    noise_trim_fname = download_rename(
        "beast_example_phat_noisemodel_trim.grid.hd5")
    seds_trim_fname = download_rename("beast_example_phat_seds_trim.grid.hd5")

    # download cached version of fitting results
    # stats_fname_cache = download_rename('beast_example_phat_stats.fits')
    # pdf1d_fname_cache = download_rename('beast_example_phat_pdf1d.fits')

    # read in the observed data
    filters = [
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_ACS_WFC_F475W",
        "HST_ACS_WFC_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
    ]
    basefilters = ["F275W", "F336W", "F475W", "F814W", "F110W", "F160W"]
    obs_colnames = [f.lower() + "_rate" for f in basefilters]

    obsdata = Observations(obs_fname,
                           filters,
                           obs_colnames,
                           vega_fname=vega_fname)

    #########################################################################################
    # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE #
    #########################################################################################
    num_subgrids = 3

    # Split SED grid
    sub_seds_trim_fnames = subgridding_tools.split_grid(seds_trim_fname,
                                                        num_subgrids,
                                                        overwrite=True)

    # Split noise grid (a standardized function does not exist)
    sub_noise_trim_fnames = []

    noisemodel_vals = get_noisemodelcat(noise_trim_fname)
    slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]),
                                              num_subgrids)
    for i, slc in enumerate(slices):
        outname = noise_trim_fname.replace(".hd5", "sub{}.hd5".format(i))
        with tables.open_file(outname, "w") as outfile:
            outfile.create_array(outfile.root, "bias",
                                 noisemodel_vals["bias"][slc])
            outfile.create_array(outfile.root, "error",
                                 noisemodel_vals["error"][slc])
            outfile.create_array(outfile.root, "completeness",
                                 noisemodel_vals["completeness"][slc])
        sub_noise_trim_fnames.append(outname)

    # Collect information about the parameter rangers, to make the pdf1d bins
    # consistent between subgrids
    grid_info_dict = subgridding_tools.reduce_grid_info(sub_seds_trim_fnames,
                                                        sub_noise_trim_fnames,
                                                        nprocs=1,
                                                        cap_unique=100)

    ##################################################
    # STEP 3: GENERATE FILENAMES AND RUN THE FITTING #
    ##################################################
    def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"):
        return [
            base_fname.replace(extension, "gridsub{}{}".format(i, extension))
            for i in range(num_subgrids)
        ]

    stats_fname = "/tmp/beast_example_phat_stats.fits"
    pdf1d_fname = "/tmp/beast_example_phat_pdf1d.fits"
    lnp_fname = "/tmp/beast_example_phat_lnp.hd5"

    subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids)
    subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids)
    subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname,
                                             num_subgrids,
                                             extension=".hd5")

    for i in range(num_subgrids):
        sub_noisemodel_vals = get_noisemodelcat(sub_noise_trim_fnames[i])
        fit.summary_table_memory(
            obsdata,
            sub_noisemodel_vals,
            sub_seds_trim_fnames[i],
            threshold=-40.0,
            save_every_npts=100,
            lnp_npts=60,
            stats_outname=subgrid_stats_fnames[i],
            pdf1d_outname=subgrid_pdf1d_fnames[i],
            lnp_outname=subgrid_lnp_fnames[i],
            grid_info_dict=grid_info_dict,
            do_not_normalize=True,
        )
        # The do_not_normalize option is absolutely crucial!

    # Now merge the results
    merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats(
        subgrid_pdf1d_fnames, subgrid_stats_fnames)

    # Do a full fit also
    normal_stats = "normal_stats.fits"
    normal_pdf1d = "normal_pdf1d.fits"
    normal_lnp = "normal_lnp.hd5"
    fit.summary_table_memory(
        obsdata,
        noisemodel_vals,
        seds_trim_fname,
        threshold=-40.0,
        save_every_npts=100,
        lnp_npts=60,
        stats_outname=normal_stats,
        pdf1d_outname=normal_pdf1d,
        lnp_outname=normal_lnp,
        do_not_normalize=True,
    )
    # Here, we also need to use do_not_normalize, otherwise Pmax will be
    # different by a factor

    # CHECKS
    tolerance = 1e-6
    print("comparing pdf1d")
    # fits_cache = fits.open(pdf1d_fname_cache)
    fits_normal = fits.open(normal_pdf1d)
    fits_new = fits.open(merged_pdf1d_fname)

    if not len(fits_new) == len(fits_normal):
        raise AssertionError()

    # A similar problem to the above will also occur here
    for k in range(1, len(fits_new)):
        qname = fits_new[k].header["EXTNAME"]
        print(qname)
        np.testing.assert_allclose(fits_new[k].data,
                                   fits_normal[qname].data,
                                   rtol=tolerance,
                                   atol=tolerance)

    print("comparing stats")
    # table_cache = Table.read(stats_fname_cache)
    table_normal = Table.read(normal_stats)
    table_new = Table.read(merged_stats_fname)

    if not len(table_normal) == len(table_new):
        raise AssertionError()

    # These will normally fail, as the merging process can not be made
    # bit-correct due do floating point math (exacerbated by exponentials)
    for c in table_new.colnames:
        print(c)
        if c == "Name" or c == "RA" or c == "DEC":
            np.testing.assert_equal(
                table_normal[c],
                table_new[c],
                err_msg="column {} is not equal".format(c),
            )
        else:
            np.testing.assert_allclose(
                table_normal[c],
                table_new[c],
                rtol=tolerance,
                equal_nan=True,
                err_msg="column {} is not close enough".format(c),
            )