Beispiel #1
0
def merge_files(use_sd=True, nsubs=1):
    """
    Merge all of the results from the assorted fitting sub-files (divided by
    source density, subgrids, or both).


    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    """

    # if there's no SD and no subgridding, running this is unnecessary
    if use_sd and (nsubs == 1):
        print("No merging necessary")
        return

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # get file name lists (to check if they exist and/or need to be resumed)
    file_dict = create_filenames.create_filenames(use_sd=use_sd, nsubs=nsubs)

    # - input files
    # photometry_files = file_dict['photometry_files']
    # modelsedgrid_files = file_dict['modelsedgrid_files']
    # noise_files = file_dict['noise_files']

    # - output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    # lnp_files = file_dict['lnp_files']

    # - other useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']
    # the unique sets of gridsub
    unique_sd_sub = [
        x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
    ]

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        out_filebase = "{0}/{0}".format(datamodel.project)
        reorder_tags = [
            "sd{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
        ]
        merge_beast_stats.merge_stats_files(stats_files,
                                            out_filebase,
                                            reorder_tag_list=reorder_tags)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # runs were split by source density
        if use_sd:

            # lists to save the merged file names
            merged_pdf_files = []
            merged_stats_files = []

            for i, sd_sub in enumerate(unique_sd_sub):

                # indices with the current sd_sub
                ind = [j for j, x in enumerate(sd_sub_info) if x == sd_sub]

                # merge the subgrid files for that SD+sub
                out_filebase = "{0}/SD{1}_sub{2}/{0}_SD{1}_sub{2}".format(
                    datamodel.project, sd_sub[0], sd_sub[1])

                merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats(
                    [pdf_files[j] for j in ind],
                    [stats_files[j] for j in ind],
                    re_run=False,
                    output_fname_base=out_filebase,
                )

                merged_pdf_files.append(merged_pdf1d_fname)
                merged_stats_files.append(merged_stats_fname)

            # merge the merged stats files
            out_filebase = "{0}/{0}".format(datamodel.project)
            reorder_tags = [
                "sd{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
            ]
            merge_beast_stats.merge_stats_files(merged_stats_files,
                                                out_filebase,
                                                reorder_tag_list=reorder_tags)

        # runs weren't split by source density
        else:

            out_filebase = "{0}/{0}".format(datamodel.project)

            subgridding_tools.merge_pdf1d_stats(pdf_files,
                                                stats_files,
                                                output_fname_base=out_filebase)
Beispiel #2
0
    if args.merge:
        modelsedgridfiles = get_modelsubgridfiles()
        with_fits = [s.replace(".hd5", ".fits") for s in modelsedgridfiles]
        pdf1dfiles = [s.replace("seds", "pdf1d") for s in with_fits]
        statsfiles = [s.replace("seds", "stats") for s in with_fits]
        output_fname_base = os.path.join(settings.project, "combined")
        if args.dens_bin is not None:
            pdf1dfiles, statsfiles = [[
                os.path.join(bin_subfolder, f) for f in l
            ] for l in [pdf1dfiles, statsfiles]]
            output_fname_base = os.path.join(bin_subfolder, output_fname_base)

        if args.ignore_missing_subresults:
            # remove any missing filenames from the lists, and hope for the best
            def only_existing_files(file_list):
                return [f for f in file_list if os.path.isfile(f)]

            pdf1dfiles = only_existing_files(pdf1dfiles)
            statsfiles = only_existing_files(statsfiles)

        print("Merging")
        print(list(zip(pdf1dfiles, statsfiles)))

        subgridding_tools.merge_pdf1d_stats(
            pdf1dfiles, statsfiles, output_fname_base=output_fname_base)

    # print help if no arguments
    if not any(vars(args).values()):
        parser.print_help()
Beispiel #3
0
def merge_files(beast_settings_info, use_sd=True, nsubs=1, partial=False):
    """
    Merge all of the results from the assorted fitting sub-files (divided by
    source density, subgrids, or both).

    If fitting is in progress but you want to check results of completed stars,
    set partial=True.  This is only relevant when using subgrids.

    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    use_sd : boolean (default=True)
        set to True if the fitting used source density bins

    nsubs : int (default=1)
        number of subgrids used for the physics model

    partial : boolean (default=False)
        If True, the output merged files will only have stars that have been
        run across all subgrids.  If stars have only been fit in some subgrids
        and not others, they will be discarded in the "partial" output files.
        Currently only implemented for 1D PDFs and stats (not lnP) files.

    """

    # if there's no SD and no subgridding, running this is unnecessary
    if (not use_sd) and (nsubs == 1):
        print("No merging necessary")
        return

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # get file name lists (to check if they exist and/or need to be resumed)
    file_dict = create_filenames.create_filenames(settings,
                                                  use_sd=use_sd,
                                                  nsubs=nsubs)

    # - input files
    # photometry_files = file_dict['photometry_files']
    # modelsedgrid_files = file_dict['modelsedgrid_files']
    # noise_files = file_dict['noise_files']

    # - output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    lnp_files = file_dict["lnp_files"]

    # - other useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']
    # the unique sets of gridsub
    unique_sd_sub = [
        x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
    ]

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        out_filebase = "{0}/{0}".format(settings.project)
        reorder_tags = [
            "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
        ]
        merge_beast_stats.merge_stats_files(stats_files,
                                            out_filebase,
                                            reorder_tag_list=reorder_tags)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # runs were split by source density
        if use_sd:

            # lists to save the merged file names
            merged_pdf_files = []
            merged_stats_files = []
            merged_lnp_files = []

            for sd_sub in unique_sd_sub:

                # indices with the current sd_sub
                ind = [j for j, x in enumerate(sd_sub_info) if x == sd_sub]

                # merge the subgrid files for that SD+sub
                out_filebase = "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}".format(
                    settings.project, sd_sub[0], sd_sub[1])
                if partial:
                    out_filebase += "_partial"

                # - 1D PDFs and stats
                (
                    merged_pdf1d_fname,
                    merged_stats_fname,
                ) = subgridding_tools.merge_pdf1d_stats(
                    [pdf_files[j] for j in ind],
                    [stats_files[j] for j in ind],
                    re_run=False,
                    output_fname_base=out_filebase,
                    partial=partial,
                )

                merged_pdf_files.append(merged_pdf1d_fname)
                merged_stats_files.append(merged_stats_fname)

                # - lnP files
                if not partial:
                    merged_lnp_fname = subgridding_tools.merge_lnp(
                        [lnp_files[j] for j in ind],
                        re_run=False,
                        output_fname_base=out_filebase,
                        threshold=-10,
                    )
                    merged_lnp_files.append(merged_lnp_fname)

            # merge the merged stats files
            out_filebase = "{0}/{0}".format(settings.project)
            reorder_tags = [
                "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
            ]
            merge_beast_stats.merge_stats_files(merged_stats_files,
                                                out_filebase,
                                                reorder_tag_list=reorder_tags)

        # runs weren't split by source density
        else:

            out_filebase = "{0}/{0}".format(settings.project)

            # - 1D PDFs and stats
            subgridding_tools.merge_pdf1d_stats(
                pdf_files,
                stats_files,
                output_fname_base=out_filebase,
                partial=partial,
            )

            # - lnP files
            if not partial:
                subgridding_tools.merge_lnp(
                    lnp_files,
                    re_run=False,
                    output_fname_base=out_filebase,
                    threshold=-10,
                )
Beispiel #4
0
    def test_merge_pdf1d_stats(self):
        """
        Using cached versions of the observations, sed grid, and noise model,
        split the grids and do the fitting on the subgrids and original
        grid.  Merge the results from the subgrids and compare to the results
        from fitting the full grid.
        """
        ######################################
        # STEP 1: GET SOME DATA TO WORK WITH #
        ######################################

        # read in the observed data
        obsdata = Observations(self.obs_fname_cache, self.settings.filters,
                               self.settings.obs_colnames)

        #########################################################################################
        # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE #
        #########################################################################################
        num_subgrids = 3

        # Split SED grid
        sub_seds_trim_fnames = subgridding_tools.split_grid(
            self.seds_trim_fname_cache, num_subgrids, overwrite=True)

        # Split noise grid (a standardized function does not exist)
        sub_noise_trim_fnames = []

        noisemodel_vals = noisemodel.get_noisemodelcat(
            self.noise_trim_fname_cache)
        slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]),
                                                  num_subgrids)
        for i, slc in enumerate(slices):
            outname = self.noise_trim_fname_cache.replace(
                ".hd5", "sub{}.hd5".format(i))
            with tables.open_file(outname, "w") as outfile:
                outfile.create_array(outfile.root, "bias",
                                     noisemodel_vals["bias"][slc])
                outfile.create_array(outfile.root, "error",
                                     noisemodel_vals["error"][slc])
                outfile.create_array(outfile.root, "completeness",
                                     noisemodel_vals["completeness"][slc])
            sub_noise_trim_fnames.append(outname)

        # Collect information about the parameter rangers, to make the pdf1d bins
        # consistent between subgrids
        grid_info_dict = subgridding_tools.reduce_grid_info(
            sub_seds_trim_fnames,
            sub_noise_trim_fnames,
            nprocs=1,
            cap_unique=100)

        ##################################################
        # STEP 3: GENERATE FILENAMES AND RUN THE FITTING #
        ##################################################
        def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"):
            return [
                base_fname.replace(extension,
                                   "gridsub{}{}".format(i, extension))
                for i in range(num_subgrids)
            ]

        stats_fname = tempfile.NamedTemporaryFile(suffix=".fits").name
        pdf1d_fname = tempfile.NamedTemporaryFile(suffix=".fits").name
        lnp_fname = tempfile.NamedTemporaryFile(suffix=".hd5").name

        subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids)
        subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids)
        subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname,
                                                 num_subgrids,
                                                 extension=".hd5")

        for i in range(num_subgrids):
            sub_noisemodel_vals = noisemodel.get_noisemodelcat(
                sub_noise_trim_fnames[i])
            fit.summary_table_memory(
                obsdata,
                sub_noisemodel_vals,
                sub_seds_trim_fnames[i],
                threshold=-40.0,
                save_every_npts=100,
                lnp_npts=500,
                stats_outname=subgrid_stats_fnames[i],
                pdf1d_outname=subgrid_pdf1d_fnames[i],
                lnp_outname=subgrid_lnp_fnames[i],
                grid_info_dict=grid_info_dict,
                do_not_normalize=True,
            )
            # The do_not_normalize option is absolutely crucial!

        # Now merge the results
        merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats(
            subgrid_pdf1d_fnames, subgrid_stats_fnames)

        # Do a full fit also
        normal_stats = tempfile.NamedTemporaryFile(suffix=".fits").name
        normal_pdf1d = tempfile.NamedTemporaryFile(suffix=".fits").name
        normal_lnp = tempfile.NamedTemporaryFile(suffix=".hd5").name
        fit.summary_table_memory(
            obsdata,
            noisemodel_vals,
            self.seds_trim_fname_cache,
            threshold=-40.0,
            save_every_npts=100,
            lnp_npts=500,
            stats_outname=normal_stats,
            pdf1d_outname=normal_pdf1d,
            lnp_outname=normal_lnp,
            do_not_normalize=True,
        )
        # Here, we also need to use do_not_normalize, otherwise Pmax will be
        # different by a factor

        # CHECKS
        tolerance = 1e-6
        fits_normal = fits.open(normal_pdf1d)
        fits_new = fits.open(merged_pdf1d_fname)

        if not len(fits_new) == len(fits_normal):
            raise AssertionError()

        # A similar problem to the above will also occur here
        for k in range(1, len(fits_new)):
            qname = fits_new[k].header["EXTNAME"]
            np.testing.assert_allclose(
                fits_new[k].data,
                fits_normal[qname].data,
                rtol=tolerance,
                atol=tolerance,
            )

        table_normal = Table.read(normal_stats)
        table_new = Table.read(merged_stats_fname)

        if not len(table_normal) == len(table_new):
            raise AssertionError()

        # These will normally fail, as the merging process can not be made
        # bit-correct due do floating point math (exacerbated by exponentials)
        for c in table_new.colnames:
            if c == "Name" or c == "RA" or c == "DEC":
                np.testing.assert_equal(
                    table_normal[c],
                    table_new[c],
                    err_msg="column {} is not equal".format(c),
                )
            else:
                np.testing.assert_allclose(
                    table_normal[c],
                    table_new[c],
                    rtol=tolerance,
                    equal_nan=True,
                    err_msg="column {} is not close enough".format(c),
                )
Beispiel #5
0
def test_merge_pdf1d_stats():
    ######################################
    # STEP 1: GET SOME DATA TO WORK WITH #
    ######################################
    vega_fname = download_rename("vega.hd5")
    obs_fname = download_rename("b15_4band_det_27_A.fits")
    noise_trim_fname = download_rename(
        "beast_example_phat_noisemodel_trim.grid.hd5")
    seds_trim_fname = download_rename("beast_example_phat_seds_trim.grid.hd5")

    # download cached version of fitting results
    # stats_fname_cache = download_rename('beast_example_phat_stats.fits')
    # pdf1d_fname_cache = download_rename('beast_example_phat_pdf1d.fits')

    # read in the observed data
    filters = [
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_ACS_WFC_F475W",
        "HST_ACS_WFC_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
    ]
    basefilters = ["F275W", "F336W", "F475W", "F814W", "F110W", "F160W"]
    obs_colnames = [f.lower() + "_rate" for f in basefilters]

    obsdata = Observations(obs_fname,
                           filters,
                           obs_colnames,
                           vega_fname=vega_fname)

    #########################################################################################
    # STEP 2: SPLIT THE GRIDS AND GENERATE THE GRID INFO DICT AS IN THE SUBGRIDDING EXAMPLE #
    #########################################################################################
    num_subgrids = 3

    # Split SED grid
    sub_seds_trim_fnames = subgridding_tools.split_grid(seds_trim_fname,
                                                        num_subgrids,
                                                        overwrite=True)

    # Split noise grid (a standardized function does not exist)
    sub_noise_trim_fnames = []

    noisemodel_vals = get_noisemodelcat(noise_trim_fname)
    slices = subgridding_tools.uniform_slices(len(noisemodel_vals["bias"]),
                                              num_subgrids)
    for i, slc in enumerate(slices):
        outname = noise_trim_fname.replace(".hd5", "sub{}.hd5".format(i))
        with tables.open_file(outname, "w") as outfile:
            outfile.create_array(outfile.root, "bias",
                                 noisemodel_vals["bias"][slc])
            outfile.create_array(outfile.root, "error",
                                 noisemodel_vals["error"][slc])
            outfile.create_array(outfile.root, "completeness",
                                 noisemodel_vals["completeness"][slc])
        sub_noise_trim_fnames.append(outname)

    # Collect information about the parameter rangers, to make the pdf1d bins
    # consistent between subgrids
    grid_info_dict = subgridding_tools.reduce_grid_info(sub_seds_trim_fnames,
                                                        sub_noise_trim_fnames,
                                                        nprocs=1,
                                                        cap_unique=100)

    ##################################################
    # STEP 3: GENERATE FILENAMES AND RUN THE FITTING #
    ##################################################
    def make_gridsub_fnames(base_fname, num_subgrids, extension=".fits"):
        return [
            base_fname.replace(extension, "gridsub{}{}".format(i, extension))
            for i in range(num_subgrids)
        ]

    stats_fname = "/tmp/beast_example_phat_stats.fits"
    pdf1d_fname = "/tmp/beast_example_phat_pdf1d.fits"
    lnp_fname = "/tmp/beast_example_phat_lnp.hd5"

    subgrid_pdf1d_fnames = make_gridsub_fnames(pdf1d_fname, num_subgrids)
    subgrid_stats_fnames = make_gridsub_fnames(stats_fname, num_subgrids)
    subgrid_lnp_fnames = make_gridsub_fnames(lnp_fname,
                                             num_subgrids,
                                             extension=".hd5")

    for i in range(num_subgrids):
        sub_noisemodel_vals = get_noisemodelcat(sub_noise_trim_fnames[i])
        fit.summary_table_memory(
            obsdata,
            sub_noisemodel_vals,
            sub_seds_trim_fnames[i],
            threshold=-40.0,
            save_every_npts=100,
            lnp_npts=60,
            stats_outname=subgrid_stats_fnames[i],
            pdf1d_outname=subgrid_pdf1d_fnames[i],
            lnp_outname=subgrid_lnp_fnames[i],
            grid_info_dict=grid_info_dict,
            do_not_normalize=True,
        )
        # The do_not_normalize option is absolutely crucial!

    # Now merge the results
    merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats(
        subgrid_pdf1d_fnames, subgrid_stats_fnames)

    # Do a full fit also
    normal_stats = "normal_stats.fits"
    normal_pdf1d = "normal_pdf1d.fits"
    normal_lnp = "normal_lnp.hd5"
    fit.summary_table_memory(
        obsdata,
        noisemodel_vals,
        seds_trim_fname,
        threshold=-40.0,
        save_every_npts=100,
        lnp_npts=60,
        stats_outname=normal_stats,
        pdf1d_outname=normal_pdf1d,
        lnp_outname=normal_lnp,
        do_not_normalize=True,
    )
    # Here, we also need to use do_not_normalize, otherwise Pmax will be
    # different by a factor

    # CHECKS
    tolerance = 1e-6
    print("comparing pdf1d")
    # fits_cache = fits.open(pdf1d_fname_cache)
    fits_normal = fits.open(normal_pdf1d)
    fits_new = fits.open(merged_pdf1d_fname)

    if not len(fits_new) == len(fits_normal):
        raise AssertionError()

    # A similar problem to the above will also occur here
    for k in range(1, len(fits_new)):
        qname = fits_new[k].header["EXTNAME"]
        print(qname)
        np.testing.assert_allclose(fits_new[k].data,
                                   fits_normal[qname].data,
                                   rtol=tolerance,
                                   atol=tolerance)

    print("comparing stats")
    # table_cache = Table.read(stats_fname_cache)
    table_normal = Table.read(normal_stats)
    table_new = Table.read(merged_stats_fname)

    if not len(table_normal) == len(table_new):
        raise AssertionError()

    # These will normally fail, as the merging process can not be made
    # bit-correct due do floating point math (exacerbated by exponentials)
    for c in table_new.colnames:
        print(c)
        if c == "Name" or c == "RA" or c == "DEC":
            np.testing.assert_equal(
                table_normal[c],
                table_new[c],
                err_msg="column {} is not equal".format(c),
            )
        else:
            np.testing.assert_allclose(
                table_normal[c],
                table_new[c],
                rtol=tolerance,
                equal_nan=True,
                err_msg="column {} is not close enough".format(c),
            )
Beispiel #6
0
                trimmed_noisemodelfile)

            fit.summary_table_memory(obsdata, noisemodel_vals,
                                     modelsedgrid, resume=args.resume,
                                     threshold=-10.,
                                     save_every_npts=100, lnp_npts=60,
                                     stats_outname=statsfile,
                                     pdf1d_outname=pdf1dfile,
                                     grid_info_dict=grid_info_dict,
                                     lnp_outname=lnpfile,
                                     do_not_normalize=True)
            print('Done fitting on grid ' + trimmed_modelsedgridfile)

        parallel_wrapper(fit_submodel, modelsedgridfiles)

        new_time = time.clock()
        print('time to fit: ', (new_time - start_time) / 60., ' min')

    if args.merge:
        modelsedgridfiles = get_modelsubgridfiles()
        with_fits = [s.replace('.hd5', '.fits') for s in modelsedgridfiles]
        pdf1dfiles = [s.replace('seds', 'pdf1d') for s in with_fits]
        statsfiles = [s.replace('seds', 'stats') for s in with_fits]
        print('Merging')
        print(list(zip(pdf1dfiles, statsfiles)))
        subgridding_tools.merge_pdf1d_stats(pdf1dfiles, statsfiles)

    # print help if no arguments
    if not any(vars(args).values()):
        parser.print_help()