コード例 #1
0
    def test_reduce_grid_info(self):
        """
        Split a cached version of a sed grid and check that [not quite
        sure what this is checking - details needed].
        """
        sub_fnames = subgridding_tools.split_grid(self.seds_trim_fname_cache,
                                                  3)

        complete_g_info = subgridding_tools.subgrid_info(
            self.seds_trim_fname_cache)
        cap_unique = 50
        sub_g_info = subgridding_tools.reduce_grid_info(sub_fnames,
                                                        nprocs=3,
                                                        cap_unique=cap_unique)

        for q in complete_g_info:
            if q not in sub_g_info:
                raise AssertionError()
            if not complete_g_info[q]["min"] == sub_g_info[q]["min"]:
                raise AssertionError()
            if not complete_g_info[q]["max"] == sub_g_info[q]["max"]:
                raise AssertionError()
            num_unique = len(complete_g_info[q]["unique"])
            if num_unique > cap_unique:
                # Cpan still be larger if one of the sub results during the
                # reduction is larger. This is as intended.
                if not sub_g_info[q]["num_unique"] >= cap_unique:
                    raise AssertionError()
            else:
                if not sub_g_info[q]["num_unique"] == num_unique:
                    raise AssertionError()
コード例 #2
0
def test_reduce_grid_info():
    seds_trim_fname = download_rename("beast_example_phat_seds_trim.grid.hd5")
    sub_fnames = subgridding_tools.split_grid(seds_trim_fname, 3)

    complete_g_info = subgridding_tools.subgrid_info(seds_trim_fname)
    cap_unique = 50
    sub_g_info = subgridding_tools.reduce_grid_info(sub_fnames,
                                                    nprocs=3,
                                                    cap_unique=cap_unique)

    for q in complete_g_info:
        if q not in sub_g_info:
            raise AssertionError()
        if not complete_g_info[q]["min"] == sub_g_info[q]["min"]:
            raise AssertionError()
        if not complete_g_info[q]["max"] == sub_g_info[q]["max"]:
            raise AssertionError()
        num_unique = len(complete_g_info[q]["unique"])
        if num_unique > cap_unique:
            # Cpan still be larger if one of the sub results during the
            # reduction is larger. This is as intended.
            if not sub_g_info[q]["num_unique"] >= cap_unique:
                raise AssertionError()
        else:
            if not sub_g_info[q]["num_unique"] == num_unique:
                raise AssertionError()
コード例 #3
0
def test_reduce_grid_info():
    seds_trim_fname = download_rename('beast_example_phat_seds_trim.grid.hd5')
    sub_fnames = subgridding_tools.split_grid(seds_trim_fname, 3)

    complete_g_info = subgridding_tools.subgrid_info(seds_trim_fname)
    cap_unique = 50
    sub_g_info = subgridding_tools.reduce_grid_info(
        sub_fnames, nprocs=3, cap_unique=cap_unique)

    for q in complete_g_info:
        assert q in sub_g_info
        assert complete_g_info[q]['min'] == sub_g_info[q]['min']
        assert complete_g_info[q]['max'] == sub_g_info[q]['max']
        num_unique = len(complete_g_info[q]['unique'])
        if num_unique > cap_unique:
            # Cpan still be larger if one of the sub results during the
            # reduction is larger. This is as intended.
            assert sub_g_info[q]['num_unique'] >= cap_unique
        else:
            assert sub_g_info[q]['num_unique'] == num_unique
コード例 #4
0
            s.replace("seds", "noisemodel") for s in trimmed_modelsedgridfiles
        ]

        # File where the ranges and number of unique values for the grid
        # will be stored (this can take a while to calculate)
        grid_info_pkl = "grid_info_dict.pkl"

        if args.dens_bin is not None:
            # Use the right subfolder
            trimmed_modelsedgridfiles, trimmed_noisemodelfiles = [[
                os.path.join(bin_subfolder, f) for f in l
            ] for l in [trimmed_modelsedgridfiles, trimmed_noisemodelfiles]]
            grid_info_pkl = os.path.join(bin_subfolder, grid_info_pkl)

        if not os.path.isfile(grid_info_pkl):
            grid_info_dict = subgridding_tools.reduce_grid_info(
                trimmed_modelsedgridfiles, trimmed_noisemodelfiles, nprocs=4)

            with open(grid_info_pkl, "wb") as p:
                pickle.dump(grid_info_dict, p)
            print("wrote grid_info_dict to " + grid_info_pkl)
        else:
            print("loading grid_info_dict from " + grid_info_pkl)
            with open(grid_info_pkl, "rb") as p:
                grid_info_dict = pickle.loads(p.read())

        # perform fits for the subgrids individually
        def fit_submodel(modelsedgridfile):
            # input files
            trimmed_modelsedgridfile = modelsedgridfile.replace(
                "seds", "seds_trim")
            trimmed_noisemodelfile = trimmed_modelsedgridfile.replace(
コード例 #5
0
    def test_merge_pdf1d_stats(self):
        """
        Using cached versions of the observations, sed grid, and noise model,
        split the grids and do the fitting on the subgrids and original
        grid.  Merge the results from the subgrids and compare to the results
        from fitting the full grid.
        """
        ######################################
        # STEP 1: GET SOME DATA TO WORK WITH #
        ######################################

        # read in the observed data
        obsdata = Observations(self.obs_fname_cache, self.settings.filters,
                               self.settings.obs_colnames)

        #########################################################################################
        # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE #
        #########################################################################################
        num_subgrids = 3

        # Split SED grid
        sub_seds_trim_fnames = subgridding_tools.split_grid(
            self.seds_trim_fname_cache, num_subgrids, overwrite=True)

        # Split noise grid (a standardized function does not exist)
        sub_noise_trim_fnames = []

        noisemodel_vals = noisemodel.get_noisemodelcat(
            self.noise_trim_fname_cache)
        slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]),
                                                  num_subgrids)
        for i, slc in enumerate(slices):
            outname = self.noise_trim_fname_cache.replace(
                ".hd5", "sub{}.hd5".format(i))
            with tables.open_file(outname, "w") as outfile:
                outfile.create_array(outfile.root, "bias",
                                     noisemodel_vals["bias"][slc])
                outfile.create_array(outfile.root, "error",
                                     noisemodel_vals["error"][slc])
                outfile.create_array(outfile.root, "completeness",
                                     noisemodel_vals["completeness"][slc])
            sub_noise_trim_fnames.append(outname)

        # Collect information about the parameter rangers, to make the pdf1d bins
        # consistent between subgrids
        grid_info_dict = subgridding_tools.reduce_grid_info(
            sub_seds_trim_fnames,
            sub_noise_trim_fnames,
            nprocs=1,
            cap_unique=100)

        ##################################################
        # STEP 3: GENERATE FILENAMES AND RUN THE FITTING #
        ##################################################
        def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"):
            return [
                base_fname.replace(extension,
                                   "gridsub{}{}".format(i, extension))
                for i in range(num_subgrids)
            ]

        stats_fname = tempfile.NamedTemporaryFile(suffix=".fits").name
        pdf1d_fname = tempfile.NamedTemporaryFile(suffix=".fits").name
        lnp_fname = tempfile.NamedTemporaryFile(suffix=".hd5").name

        subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids)
        subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids)
        subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname,
                                                 num_subgrids,
                                                 extension=".hd5")

        for i in range(num_subgrids):
            sub_noisemodel_vals = noisemodel.get_noisemodelcat(
                sub_noise_trim_fnames[i])
            fit.summary_table_memory(
                obsdata,
                sub_noisemodel_vals,
                sub_seds_trim_fnames[i],
                threshold=-40.0,
                save_every_npts=100,
                lnp_npts=500,
                stats_outname=subgrid_stats_fnames[i],
                pdf1d_outname=subgrid_pdf1d_fnames[i],
                lnp_outname=subgrid_lnp_fnames[i],
                grid_info_dict=grid_info_dict,
                do_not_normalize=True,
            )
            # The do_not_normalize option is absolutely crucial!

        # Now merge the results
        merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats(
            subgrid_pdf1d_fnames, subgrid_stats_fnames)

        # Do a full fit also
        normal_stats = tempfile.NamedTemporaryFile(suffix=".fits").name
        normal_pdf1d = tempfile.NamedTemporaryFile(suffix=".fits").name
        normal_lnp = tempfile.NamedTemporaryFile(suffix=".hd5").name
        fit.summary_table_memory(
            obsdata,
            noisemodel_vals,
            self.seds_trim_fname_cache,
            threshold=-40.0,
            save_every_npts=100,
            lnp_npts=500,
            stats_outname=normal_stats,
            pdf1d_outname=normal_pdf1d,
            lnp_outname=normal_lnp,
            do_not_normalize=True,
        )
        # Here, we also need to use do_not_normalize, otherwise Pmax will be
        # different by a factor

        # CHECKS
        tolerance = 1e-6
        fits_normal = fits.open(normal_pdf1d)
        fits_new = fits.open(merged_pdf1d_fname)

        if not len(fits_new) == len(fits_normal):
            raise AssertionError()

        # A similar problem to the above will also occur here
        for k in range(1, len(fits_new)):
            qname = fits_new[k].header["EXTNAME"]
            np.testing.assert_allclose(
                fits_new[k].data,
                fits_normal[qname].data,
                rtol=tolerance,
                atol=tolerance,
            )

        table_normal = Table.read(normal_stats)
        table_new = Table.read(merged_stats_fname)

        if not len(table_normal) == len(table_new):
            raise AssertionError()

        # These will normally fail, as the merging process can not be made
        # bit-correct due do floating point math (exacerbated by exponentials)
        for c in table_new.colnames:
            if c == "Name" or c == "RA" or c == "DEC":
                np.testing.assert_equal(
                    table_normal[c],
                    table_new[c],
                    err_msg="column {} is not equal".format(c),
                )
            else:
                np.testing.assert_allclose(
                    table_normal[c],
                    table_new[c],
                    rtol=tolerance,
                    equal_nan=True,
                    err_msg="column {} is not close enough".format(c),
                )
コード例 #6
0
def test_merge_pdf1d_stats():
    ######################################
    # STEP 1: GET SOME DATA TO WORK WITH #
    ######################################
    vega_fname = download_rename("vega.hd5")
    obs_fname = download_rename("b15_4band_det_27_A.fits")
    noise_trim_fname = download_rename(
        "beast_example_phat_noisemodel_trim.grid.hd5")
    seds_trim_fname = download_rename("beast_example_phat_seds_trim.grid.hd5")

    # download cached version of fitting results
    # stats_fname_cache = download_rename('beast_example_phat_stats.fits')
    # pdf1d_fname_cache = download_rename('beast_example_phat_pdf1d.fits')

    # read in the observed data
    filters = [
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_ACS_WFC_F475W",
        "HST_ACS_WFC_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
    ]
    basefilters = ["F275W", "F336W", "F475W", "F814W", "F110W", "F160W"]
    obs_colnames = [f.lower() + "_rate" for f in basefilters]

    obsdata = Observations(obs_fname,
                           filters,
                           obs_colnames,
                           vega_fname=vega_fname)

    #########################################################################################
    # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE #
    #########################################################################################
    num_subgrids = 3

    # Split SED grid
    sub_seds_trim_fnames = subgridding_tools.split_grid(seds_trim_fname,
                                                        num_subgrids,
                                                        overwrite=True)

    # Split noise grid (a standardized function does not exist)
    sub_noise_trim_fnames = []

    noisemodel_vals = get_noisemodelcat(noise_trim_fname)
    slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]),
                                              num_subgrids)
    for i, slc in enumerate(slices):
        outname = noise_trim_fname.replace(".hd5", "sub{}.hd5".format(i))
        with tables.open_file(outname, "w") as outfile:
            outfile.create_array(outfile.root, "bias",
                                 noisemodel_vals["bias"][slc])
            outfile.create_array(outfile.root, "error",
                                 noisemodel_vals["error"][slc])
            outfile.create_array(outfile.root, "completeness",
                                 noisemodel_vals["completeness"][slc])
        sub_noise_trim_fnames.append(outname)

    # Collect information about the parameter rangers, to make the pdf1d bins
    # consistent between subgrids
    grid_info_dict = subgridding_tools.reduce_grid_info(sub_seds_trim_fnames,
                                                        sub_noise_trim_fnames,
                                                        nprocs=1,
                                                        cap_unique=100)

    ##################################################
    # STEP 3: GENERATE FILENAMES AND RUN THE FITTING #
    ##################################################
    def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"):
        return [
            base_fname.replace(extension, "gridsub{}{}".format(i, extension))
            for i in range(num_subgrids)
        ]

    stats_fname = "/tmp/beast_example_phat_stats.fits"
    pdf1d_fname = "/tmp/beast_example_phat_pdf1d.fits"
    lnp_fname = "/tmp/beast_example_phat_lnp.hd5"

    subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids)
    subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids)
    subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname,
                                             num_subgrids,
                                             extension=".hd5")

    for i in range(num_subgrids):
        sub_noisemodel_vals = get_noisemodelcat(sub_noise_trim_fnames[i])
        fit.summary_table_memory(
            obsdata,
            sub_noisemodel_vals,
            sub_seds_trim_fnames[i],
            threshold=-40.0,
            save_every_npts=100,
            lnp_npts=60,
            stats_outname=subgrid_stats_fnames[i],
            pdf1d_outname=subgrid_pdf1d_fnames[i],
            lnp_outname=subgrid_lnp_fnames[i],
            grid_info_dict=grid_info_dict,
            do_not_normalize=True,
        )
        # The do_not_normalize option is absolutely crucial!

    # Now merge the results
    merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats(
        subgrid_pdf1d_fnames, subgrid_stats_fnames)

    # Do a full fit also
    normal_stats = "normal_stats.fits"
    normal_pdf1d = "normal_pdf1d.fits"
    normal_lnp = "normal_lnp.hd5"
    fit.summary_table_memory(
        obsdata,
        noisemodel_vals,
        seds_trim_fname,
        threshold=-40.0,
        save_every_npts=100,
        lnp_npts=60,
        stats_outname=normal_stats,
        pdf1d_outname=normal_pdf1d,
        lnp_outname=normal_lnp,
        do_not_normalize=True,
    )
    # Here, we also need to use do_not_normalize, otherwise Pmax will be
    # different by a factor

    # CHECKS
    tolerance = 1e-6
    print("comparing pdf1d")
    # fits_cache = fits.open(pdf1d_fname_cache)
    fits_normal = fits.open(normal_pdf1d)
    fits_new = fits.open(merged_pdf1d_fname)

    if not len(fits_new) == len(fits_normal):
        raise AssertionError()

    # A similar problem to the above will also occur here
    for k in range(1, len(fits_new)):
        qname = fits_new[k].header["EXTNAME"]
        print(qname)
        np.testing.assert_allclose(fits_new[k].data,
                                   fits_normal[qname].data,
                                   rtol=tolerance,
                                   atol=tolerance)

    print("comparing stats")
    # table_cache = Table.read(stats_fname_cache)
    table_normal = Table.read(normal_stats)
    table_new = Table.read(merged_stats_fname)

    if not len(table_normal) == len(table_new):
        raise AssertionError()

    # These will normally fail, as the merging process can not be made
    # bit-correct due do floating point math (exacerbated by exponentials)
    for c in table_new.colnames:
        print(c)
        if c == "Name" or c == "RA" or c == "DEC":
            np.testing.assert_equal(
                table_normal[c],
                table_new[c],
                err_msg="column {} is not equal".format(c),
            )
        else:
            np.testing.assert_allclose(
                table_normal[c],
                table_new[c],
                rtol=tolerance,
                equal_nan=True,
                err_msg="column {} is not close enough".format(c),
            )
コード例 #7
0
ファイル: run_fitting.py プロジェクト: dthilker/beast
def run_fitting(
    use_sd=True,
    nsubs=1,
    nprocs=1,
    choose_sd_sub=None,
    choose_subgrid=None,
    pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'],
    resume=False,
):
    """
    Run the fitting.  If nsubs > 1, this will find existing subgrids.
    If use_sd is True, will also incorporate source density info.

    The additional choose_* options are to make queue scripts usable,
    by specifying a given SD+sub and/or subgrid for the fitting run.


    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    choose_sd_sub : list of two strings (default=None)
        If this is set, the fitting will just be for this combo of SD+sub,
        rather than all of them.  Overrides use_sd.
        format of the list: ['#','#']

    choose_subgrid : int (default=None)
        If this is set, the fitting with just be for this subgrid index.
        If nsubs=1, this is ignored.

    pdf2d_param_list : list of strings or None
        If set, do 2D PDFs of these parameters.  If None, don't make 2D PDFs.

    resume : boolean (default=False)
        choose whether to resume existing run or start over

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # keep track of time
    start_time = time.clock()

    # --------------------
    # make lists of file names
    # --------------------

    file_dict = create_filenames.create_filenames(
        use_sd=use_sd,
        nsubs=nsubs,
        choose_sd_sub=choose_sd_sub,
        choose_subgrid=choose_subgrid,
    )

    # input files
    photometry_files = file_dict["photometry_files"]
    # modelsedgrid_files = file_dict["modelsedgrid_files"]
    modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"]
    # noise_files = file_dict["noise_files"]
    noise_trim_files = file_dict["noise_trim_files"]

    # output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    pdf2d_files = file_dict["pdf2d_files"]
    if pdf2d_param_list is None:
        pdf2d_files = [None for i in range(len(pdf2d_files))]
    lnp_files = file_dict["lnp_files"]

    # total number of files
    n_files = len(photometry_files)

    # other potentially useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']

    # if using subgrids, make the grid dictionary file:
    # File where the ranges and number of unique values for the grid
    # will be stored (this can take a while to calculate)

    if nsubs > 1:

        gridpickle_files = file_dict["gridpickle_files"]

        for i in range(len(gridpickle_files)):
            if not os.path.isfile(gridpickle_files[i]):

                # list of corresponding SED grids and noise models

                # - with SD+sub: get file list for ALL subgrids at current SD+sub
                if use_sd or (choose_sd_sub is not None):
                    temp = create_filenames.create_filenames(
                        nsubs=nsubs, choose_sd_sub=sd_sub_info[i], choose_subgrid=None
                    )
                    modelsedgrid_trim_list = temp["modelsedgrid_trim_files"]
                    noise_trim_list = temp["noise_trim_files"]

                # - no SD info: get file list for ALL subgrids
                else:
                    temp = create_filenames.create_filenames(
                        use_sd=False, nsubs=nsubs, choose_subgrid=None
                    )
                    modelsedgrid_trim_list = temp["modelsedgrid_trim_files"]
                    noise_trim_list = temp["noise_trim_files"]

                # create the grid info dictionary
                print("creating grid_info_dict for " + gridpickle_files[i])
                grid_info_dict = subgridding_tools.reduce_grid_info(
                    modelsedgrid_trim_list, noise_trim_list, nprocs=nprocs
                )
                # save it
                with open(gridpickle_files[i], "wb") as p:
                    pickle.dump(grid_info_dict, p)
                print("wrote grid_info_dict to " + gridpickle_files[i])

    # --------------------
    # do the fitting!
    # --------------------

    # set up function inputs

    if nsubs == 1:

        input_list = [
            (
                photometry_files[i],
                modelsedgrid_trim_files[i],
                noise_trim_files[i],
                stats_files[i],
                pdf_files[i],
                pdf2d_files[i],
                pdf2d_param_list,
                lnp_files[i],
                None,
                resume,
            )
            for i in range(n_files)
        ]

    if nsubs > 1:

        input_list = [
            (
                photometry_files[i],
                modelsedgrid_trim_files[i],
                noise_trim_files[i],
                stats_files[i],
                pdf_files[i],
                pdf2d_files[i],
                pdf2d_param_list,
                lnp_files[i],
                gridpickle_files[i],
                resume,
            )
            for i in range(n_files)
        ]

    # run the fitting (via parallel wrapper)

    parallel_wrapper(fit_submodel, input_list, nprocs=nprocs)

    # see how long it took!
    new_time = time.clock()
    print("time to fit: ", (new_time - start_time) / 60.0, " min")