Esempio n. 1
0
def create_filenames(use_sd=True,
                     nsubs=1,
                     choose_sd_sub=None,
                     choose_subgrid=None):
    """
    Helper function to make all of the filenames.  SED grid and noise model
    are trimmed versions.

    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    choose_sd_sub : list of two strings (default=None)
        If this is set, the fitting will just be for this combo of SD+sub,
        rather than all of them.  Overrides use_sd.
        format of the list: ['#','#']

    choose_subgrid : int (default=None)
        If this is set, the fitting with just be for this subgrid index.
        If nsubs=1, this is ignored.

    Returns
    -------
    dictionary with the lists of filenames, plus the corresponding SD+sub and
    gridsub values for easy referencing

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # input files
    photometry_files = []
    modelsedgrid_files = []
    modelsedgrid_trim_files = []
    noise_files = []
    noise_trim_files = []

    # output files
    stats_files = []
    pdf_files = []
    pdf2d_files = []
    lnp_files = []

    # other potentially useful things
    sd_sub_info = []
    gridsub_info = []

    # ** no subgrids **

    if nsubs == 1:

        # -- SD+sub specified
        if choose_sd_sub is not None:

            photometry_files.append(
                datamodel.obsfile.replace(
                    ".fits",
                    "_bin{0}_sub{1}.fits".format(choose_sd_sub[0],
                                                 choose_sd_sub[1]),
                ))
            modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            modelsedgrid_trim_files.append(
                "{0}/{0}_bin{1}_sub{2}_seds_trim.grid.hd5".format(
                    datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            noise_files.append("{0}/{0}_noisemodel_bin{1}.grid.hd5".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            noise_trim_files.append(
                "{0}/{0}_bin{1}_sub{2}_noisemodel_trim.grid.hd5".format(
                    datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))

            stats_files.append("{0}/{0}_bin{1}_sub{2}_stats.fits".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            pdf_files.append("{0}/{0}_bin{1}_sub{2}_pdf1d.fits".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            pdf2d_files.append("{0}/{0}_bin{1}_sub{2}_pdf2d.fits".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            lnp_files.append("{0}/{0}_bin{1}_sub{2}_lnp.hd5".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))

            sd_sub_info.append([choose_sd_sub[0], choose_sd_sub[1]])

        # -- using source density info
        elif use_sd is True:

            photometry_files = sorted(
                glob.glob(datamodel.obsfile.replace(".fits",
                                                    "_bin*_sub*.fits")))

            for phot_file in photometry_files:
                # get the sd/sub number
                dpos = phot_file.rfind("_bin")
                spos = phot_file.rfind("sub")
                ppos = phot_file.rfind(".")
                curr_sd = phot_file[dpos + 4:spos - 1]
                curr_sub = phot_file[spos + 3:ppos]

                # construct other file names
                modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format(
                    datamodel.project, curr_sd, curr_sub))
                modelsedgrid_trim_files.append(
                    "{0}/{0}_bin{1}_sub{2}_seds_trim.grid.hd5".format(
                        datamodel.project, curr_sd, curr_sub))
                noise_files.append("{0}/{0}_noisemodel_bin{1}.grid.hd5".format(
                    datamodel.project, curr_sd, curr_sub))
                noise_trim_files.append(
                    "{0}/{0}_bin{1}_sub{2}_noisemodel_trim.grid.hd5".format(
                        datamodel.project, curr_sd, curr_sub))

                stats_files.append("{0}/{0}_bin{1}_sub{2}_stats.fits".format(
                    datamodel.project, curr_sd, curr_sub))
                pdf_files.append("{0}/{0}_bin{1}_sub{2}_pdf1d.fits".format(
                    datamodel.project, curr_sd, curr_sub))
                pdf2d_files.append("{0}/{0}_bin{1}_sub{2}_pdf2d.fits".format(
                    datamodel.project, curr_sd, curr_sub))
                lnp_files.append("{0}/{0}_bin{1}_sub{2}_lnp.hd5".format(
                    datamodel.project, curr_sd, curr_sub))

                sd_sub_info.append([curr_sd, curr_sub])

        # -- no source density splitting
        else:

            photometry_files.append(datamodel.obsfile)
            modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format(
                datamodel.project))
            modelsedgrid_trim_files.append("{0}/{0}_seds_trim.grid.hd5".format(
                datamodel.project))
            noise_files.append("{0}/{0}_noisemodel.grid.hd5".format(
                datamodel.project))
            noise_trim_files.append("{0}/{0}_noisemodel_trim.grid.hd5".format(
                datamodel.project))

            stats_files.append("{0}/{0}_stats.fits".format(datamodel.project))
            pdf_files.append("{0}/{0}_pdf1d.fits".format(datamodel.project))
            pdf2d_files.append("{0}/{0}_pdf2d.fits".format(datamodel.project))
            lnp_files.append("{0}/{0}_lnp.hd5".format(datamodel.project))

    # ** with subgrids **

    # subgrids require a pickle file with grid info
    gridpickle_files = []

    if nsubs > 1:

        # start with getting the model grid files (note these aren't trimmed ones)
        outdir = os.path.join(".", datamodel.project)
        subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")
        temp = get_modelsubgridfiles(subgrid_names_file)
        # use that to get the number of subgrids and make a list of them
        gridsub_list = np.arange(len(temp))
        # or a subset if set
        if choose_subgrid is not None:
            gridsub_list = [choose_subgrid]

        # -- SD+sub specified
        if choose_sd_sub is not None:

            for gridsub in gridsub_list:

                photometry_files.append(
                    datamodel.obsfile.replace(
                        ".fits",
                        "_bin{0}_sub{1}.fits".format(choose_sd_sub[0],
                                                     choose_sd_sub[1]),
                    ))

                modelsedgrid_files.append("{0}/{0}_seds.gridsub{3}.hd5".format(
                    datamodel.project, choose_sd_sub[0], choose_sd_sub[1],
                    gridsub))
                modelsedgrid_trim_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_seds_trim.grid.hd5"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                noise_files.append(
                    "{0}/{0}_noisemodel_bin{1}.gridsub{3}.hd5".format(
                        datamodel.project, choose_sd_sub[0], choose_sd_sub[1],
                        gridsub))
                noise_trim_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_noisemodel_trim.grid.hd5"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))

                stats_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_stats.fits"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                pdf_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf1d.fits"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                pdf2d_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf2d.fits"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                lnp_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_lnp.hd5".
                    format(datamodel.project, choose_sd_sub[0],
                           choose_sd_sub[1], gridsub))

                gridpickle_files.append(
                    "{0}/bin{1}_sub{2}/grid_info_dict.pkl".format(
                        datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))

                sd_sub_info.append([choose_sd_sub[0], choose_sd_sub[1]])
                gridsub_info.append(gridsub)

        # -- using source density info
        elif use_sd is True:

            phot_file_list = sorted(
                glob.glob(datamodel.obsfile.replace(".fits",
                                                    "_bin*_sub*.fits")))

            for phot_file in phot_file_list:
                # get the sd/sub number
                dpos = phot_file.rfind("_bin")
                spos = phot_file.rfind("sub")
                ppos = phot_file.rfind(".")
                curr_sd = phot_file[dpos + 4:spos - 1]
                curr_sub = phot_file[spos + 3:ppos]

                # construct other file names
                for gridsub in gridsub_list:
                    photometry_files.append(phot_file)
                    modelsedgrid_files.append(
                        "{0}/{0}_seds.gridsub{3}.hd5".format(
                            datamodel.project, curr_sd, curr_sub, gridsub))
                    modelsedgrid_trim_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_seds_trim.grid.hd5"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))
                    noise_files.append(
                        "{0}/{0}_noisemodel_bin{1}.gridsub{3}.hd5".format(
                            datamodel.project, curr_sd, curr_sub, gridsub))
                    noise_trim_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_noisemodel_trim.grid.hd5"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))

                    stats_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_stats.fits"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))
                    pdf_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf1d.fits"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))
                    pdf2d_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf2d.fits"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))
                    lnp_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_lnp.hd5"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))

                    gridpickle_files.append(
                        "{0}/bin{1}_sub{2}/grid_info_dict.pkl".format(
                            datamodel.project, curr_sd, curr_sub))

                    sd_sub_info.append([curr_sd, curr_sub])
                    gridsub_info.append(gridsub)

        # -- no source density splitting
        else:

            for gridsub in gridsub_list:
                photometry_files.append(datamodel.obsfile)
                modelsedgrid_files.append("{0}/{0}_seds.gridsub{1}.hd5".format(
                    datamodel.project, gridsub))
                modelsedgrid_trim_files.append(
                    "{0}/{0}_gridsub{1}_seds_trim.grid.hd5".format(
                        datamodel.project, gridsub))
                noise_files.append("{0}/{0}_noisemodel.gridsub{1}.hd5".format(
                    datamodel.project, gridsub))
                noise_trim_files.append(
                    "{0}/{0}_gridsub{1}_noisemodel_trim.grid.hd5".format(
                        datamodel.project, gridsub))

                stats_files.append("{0}/{0}_gridsub{1}_stats.fits".format(
                    datamodel.project, gridsub))
                pdf_files.append("{0}/{0}_gridsub{1}_pdf1d.fits".format(
                    datamodel.project, gridsub))
                pdf2d_files.append("{0}/{0}_gridsub{1}_pdf2d.fits".format(
                    datamodel.project, gridsub))
                lnp_files.append("{0}/{0}_gridsub{1}_lnp.hd5".format(
                    datamodel.project, gridsub))

                gridpickle_files.append("{0}/grid_info_dict.pkl".format(
                    datamodel.project))

                gridsub_info.append(gridsub)

    # double check that all file lists are the same length
    n_file_list = [
        len(x) for x in [
            photometry_files,
            modelsedgrid_files,
            modelsedgrid_trim_files,
            noise_files,
            noise_trim_files,
            stats_files,
            pdf_files,
            pdf2d_files,
            lnp_files,
        ]
    ]
    if len(np.unique(n_file_list)) > 1:
        print("file list lengths don't match!")
        return None

    return {
        "photometry_files": photometry_files,
        "modelsedgrid_files": modelsedgrid_files,
        "modelsedgrid_trim_files": modelsedgrid_trim_files,
        "noise_files": noise_files,
        "noise_trim_files": noise_trim_files,
        "stats_files": stats_files,
        "pdf_files": pdf_files,
        "pdf2d_files": pdf2d_files,
        "lnp_files": lnp_files,
        "gridpickle_files": gridpickle_files,
        "sd_sub_info": sd_sub_info,
        "gridsub_info": gridsub_info,
    }
Esempio n. 2
0
def create_obsmodel(use_sd=True,
                    nsubs=1,
                    nprocs=1,
                    subset=[None, None],
                    use_rate=True):
    """
    Create the observation models.  If nsubs > 1, this will find existing
    subgrids.  If use_sd is True, will also incorporate source density
    info.


    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    subset : list of two ints (default=[None,None])
        Only process subgrids in the range [start,stop].
        (only relevant if nsubs > 1)

    use_rate : boolean (default=True)
        Choose whether to use the rate or magnitude when creating the noise
        model.  This should always be True, but is currently an option to be
        compatible with the phat_small example (which has no rate info).
        When that gets fixed, please remove this option!

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # --------------------
    # figure out if there are source density bins
    # --------------------

    ast_file_list = sorted(
        glob.glob(datamodel.astfile.replace(".fits", "*_bin*")))

    if use_sd and (len(ast_file_list) > 0):

        sd_list = []
        for ast_file in ast_file_list:
            dpos = ast_file.rfind("_bin")
            ppos = ast_file.rfind(".")
            sd_list.append(ast_file[dpos + 4:ppos])
        print("sd list: ", sd_list)

    else:
        # if there are no ASTs with source densities, the flag should be "false"
        use_sd = False

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        modelsedgridfile = "{0}/{0}_seds.grid.hd5".format(datamodel.project)

        # if we're splitting by source density
        if use_sd:

            input_list = [(modelsedgridfile, curr_sd) for curr_sd in sd_list]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

        # if we're not splitting by source density
        else:

            input_list = [(modelsedgridfile, None, use_rate)]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # get the list of physics model files
        outdir = os.path.join(".", datamodel.project)
        subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")
        modelsedgridfiles = get_modelsubgridfiles(subgrid_names_file)[slice(
            subset[0], subset[1])]

        # if we're splitting by source density
        if use_sd:

            input_list = [(sedfile, curr_sd) for sedfile in modelsedgridfiles
                          for curr_sd in sd_list]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

        # if we're not splitting by source density
        else:

            input_list = [(sedfile, None) for sedfile in modelsedgridfiles]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)
Esempio n. 3
0
def create_obsmodel(
    beast_settings_info,
    use_sd=True,
    nsubs=1,
    nprocs=1,
    subset=[None, None],
):
    """
    Create the observation models.  If nsubs > 1, this will find existing
    subgrids.  If use_sd is True, will also incorporate source density
    info.


    Parameters
    ----------
    beast_settings_info : string or beast.tools.beast_settings.beast_settings instance
        if string: file name with beast settings
        if class: beast.tools.beast_settings.beast_settings instance

    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to settings.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    subset : list of two ints (default=[None,None])
        Only process subgrids in the range [start,stop].
        (only relevant if nsubs > 1)

    """

    # process beast settings info
    if isinstance(beast_settings_info, str):
        settings = beast_settings.beast_settings(beast_settings_info)
    elif isinstance(beast_settings_info, beast_settings.beast_settings):
        settings = beast_settings_info
    else:
        raise TypeError(
            "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance"
        )

    # --------------------
    # figure out if there are source density bins
    # --------------------

    ast_file_list = sorted(
        glob.glob(settings.astfile.replace(".fits", "*_bin*")))

    if use_sd and (len(ast_file_list) > 0):

        sd_list = []
        for ast_file in ast_file_list:
            dpos = ast_file.rfind("_bin")
            ppos = ast_file.rfind(".")
            sd_list.append(ast_file[dpos + 4:ppos])
        print("sd list: ", sd_list)

    else:
        # if there are no ASTs with source densities, the flag should be "false"
        use_sd = False

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        modelsedgridfile = "{0}/{0}_seds.grid.hd5".format(settings.project)

        # if we're splitting by source density
        if use_sd:

            input_list = [(settings, modelsedgridfile, curr_sd)
                          for curr_sd in sd_list]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

        # if we're not splitting by source density
        else:

            input_list = [(settings, modelsedgridfile, None)]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # get the list of physics model files
        outdir = os.path.join(".", settings.project)
        subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")
        modelsedgridfiles = get_modelsubgridfiles(subgrid_names_file)[slice(
            subset[0], subset[1])]

        # if we're splitting by source density
        if use_sd:

            input_list = [(settings, sedfile, curr_sd)
                          for sedfile in modelsedgridfiles
                          for curr_sd in sd_list]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

        # if we're not splitting by source density
        else:

            input_list = [(settings, sedfile, None)
                          for sedfile in modelsedgridfiles]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)