Ejemplo n.º 1
0
def split_create_physicsmodel(nsubs=1, nprocs=1):
    """
    Making the physics model grid takes a while for production runs.  This
    creates scripts to run each subgrid as a separate job.


    Parameters
    ----------
    nsubs : int (default=1)
        number of subgrids to split the physics model into

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # make sure the project directory exists
    create_project_dir(datamodel.project)

    # directory for scripts
    job_path = "./{0}/model_batch_jobs/".format(datamodel.project)
    if not os.path.isdir(job_path):
        os.mkdir(job_path)

    log_path = job_path + "logs/"
    if not os.path.isdir(log_path):
        os.mkdir(log_path)

    for i in range(nsubs):

        joblist_file = job_path + "create_physicsmodel_" + str(i) + ".job"
        with open(joblist_file, "w") as jf:

            jf.write("python -m beast.tools.run.create_physicsmodel " +
                     " --nsubs " + str(nsubs) + " --nprocs " + str(nprocs) +
                     " --subset " + str(i) + " " + str(i + 1) + " >> " +
                     log_path + "create_physicsmodel_" + str(i) + ".log\n")

        # slurm needs it to be executable
        os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH)
Ejemplo n.º 2
0
def merge_files(use_sd=True, nsubs=1):
    """
    Merge all of the results from the assorted fitting sub-files (divided by
    source density, subgrids, or both).


    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    """

    # if there's no SD and no subgridding, running this is unnecessary
    if use_sd and (nsubs == 1):
        print("No merging necessary")
        return

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # get file name lists (to check if they exist and/or need to be resumed)
    file_dict = create_filenames.create_filenames(use_sd=use_sd, nsubs=nsubs)

    # - input files
    # photometry_files = file_dict['photometry_files']
    # modelsedgrid_files = file_dict['modelsedgrid_files']
    # noise_files = file_dict['noise_files']

    # - output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    # lnp_files = file_dict['lnp_files']

    # - other useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']
    # the unique sets of gridsub
    unique_sd_sub = [
        x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
    ]

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        out_filebase = "{0}/{0}".format(datamodel.project)
        reorder_tags = [
            "sd{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
        ]
        merge_beast_stats.merge_stats_files(stats_files,
                                            out_filebase,
                                            reorder_tag_list=reorder_tags)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # runs were split by source density
        if use_sd:

            # lists to save the merged file names
            merged_pdf_files = []
            merged_stats_files = []

            for i, sd_sub in enumerate(unique_sd_sub):

                # indices with the current sd_sub
                ind = [j for j, x in enumerate(sd_sub_info) if x == sd_sub]

                # merge the subgrid files for that SD+sub
                out_filebase = "{0}/SD{1}_sub{2}/{0}_SD{1}_sub{2}".format(
                    datamodel.project, sd_sub[0], sd_sub[1])

                merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats(
                    [pdf_files[j] for j in ind],
                    [stats_files[j] for j in ind],
                    re_run=False,
                    output_fname_base=out_filebase,
                )

                merged_pdf_files.append(merged_pdf1d_fname)
                merged_stats_files.append(merged_stats_fname)

            # merge the merged stats files
            out_filebase = "{0}/{0}".format(datamodel.project)
            reorder_tags = [
                "sd{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub
            ]
            merge_beast_stats.merge_stats_files(merged_stats_files,
                                                out_filebase,
                                                reorder_tag_list=reorder_tags)

        # runs weren't split by source density
        else:

            out_filebase = "{0}/{0}".format(datamodel.project)

            subgridding_tools.merge_pdf1d_stats(pdf_files,
                                                stats_files,
                                                output_fname_base=out_filebase)
Ejemplo n.º 3
0
def create_obsmodel(use_sd=True,
                    nsubs=1,
                    nprocs=1,
                    subset=[None, None],
                    use_rate=True):
    """
    Create the observation models.  If nsubs > 1, this will find existing
    subgrids.  If use_sd is True, will also incorporate source density
    info.


    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    subset : list of two ints (default=[None,None])
        Only process subgrids in the range [start,stop].
        (only relevant if nsubs > 1)

    use_rate : boolean (default=True)
        Choose whether to use the rate or magnitude when creating the noise
        model.  This should always be True, but is currently an option to be
        compatible with the phat_small example (which has no rate info).
        When that gets fixed, please remove this option!

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # --------------------
    # figure out if there are source density bins
    # --------------------

    ast_file_list = sorted(
        glob.glob(datamodel.astfile.replace(".fits", "*_bin*")))

    if use_sd and (len(ast_file_list) > 0):

        sd_list = []
        for ast_file in ast_file_list:
            dpos = ast_file.rfind("_bin")
            ppos = ast_file.rfind(".")
            sd_list.append(ast_file[dpos + 4:ppos])
        print("sd list: ", sd_list)

    else:
        # if there are no ASTs with source densities, the flag should be "false"
        use_sd = False

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:

        modelsedgridfile = "{0}/{0}_seds.grid.hd5".format(datamodel.project)

        # if we're splitting by source density
        if use_sd:

            input_list = [(modelsedgridfile, curr_sd) for curr_sd in sd_list]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

        # if we're not splitting by source density
        else:

            input_list = [(modelsedgridfile, None, use_rate)]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:

        # get the list of physics model files
        outdir = os.path.join(".", datamodel.project)
        subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")
        modelsedgridfiles = get_modelsubgridfiles(subgrid_names_file)[slice(
            subset[0], subset[1])]

        # if we're splitting by source density
        if use_sd:

            input_list = [(sedfile, curr_sd) for sedfile in modelsedgridfiles
                          for curr_sd in sd_list]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)

        # if we're not splitting by source density
        else:

            input_list = [(sedfile, None) for sedfile in modelsedgridfiles]

            parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)
Ejemplo n.º 4
0
def run_beast_production(basename,
                         physicsmodel=False,
                         ast=False,
                         observationmodel=False,
                         trim=False,
                         fitting=False,
                         resume=False,
                         source_density='',
                         sub_source_density=''):
    """
    Turns the original command-line version of run_beast_production.py into
    something callable from within a function


    Parameters
    ----------
    basename : string
        name of the gst file (assuming it's located in ./data/)

    For the info related to the other inputs, see the argparse info at the bottom
    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)

    # check input parameters, print what is the problem, stop run_beast
    verify_params.verify_input_format(datamodel)

    # update the filenames as needed for production
    # - photometry sub-file
    datamodel.obsfile = basename.replace(
        '.fits',
        '_with_sourceden' + '_SD_' + source_density.replace('_', '-') +
        '_sub' + sub_source_density + '.fits')
    # - stats files
    stats_filebase = "%s/%s"%(datamodel.project,datamodel.project) \
                     + '_sd' + source_density.replace('_','-') \
                     + '_sub' + sub_source_density
    sed_trimname = stats_filebase + '_sed_trim.grid.hd5'
    # - trimmed noise model
    noisemodel_trimname = stats_filebase + '_noisemodel_trim.hd5'
    # - SED grid
    #modelsedgrid_filename = "%s/%s_seds.grid.hd5"%(datamodel.project,
    #                                               datamodel.project)
    modelsedgrid_filename = "METAL_seds.grid.hd5"

    print("***run information***")
    print("  project = " + datamodel.project)
    print("  obsfile = " + datamodel.obsfile)
    print("  astfile = " + datamodel.astfile)
    print("         noisefile = " + datamodel.noisefile)
    print("   trimmed sedfile = " + sed_trimname)
    print("trimmed noisefiles = " + noisemodel_trimname)
    print("    stats filebase = " + stats_filebase)

    # make sure the project directory exists
    pdir = create_project_dir(datamodel.project)

    if physicsmodel:

        # download and load the isochrones
        (iso_fname, oiso) = make_iso_table(datamodel.project,
                                           oiso=datamodel.oiso,
                                           logtmin=datamodel.logt[0],
                                           logtmax=datamodel.logt[1],
                                           dlogt=datamodel.logt[2],
                                           z=datamodel.z)

        if hasattr(datamodel, 'add_spectral_properties_kwargs'):
            extra_kwargs = datamodel.add_spectral_properties_kwargs
        else:
            extra_kwargs = None

        if hasattr(datamodel, 'velocity'):
            redshift = (datamodel.velocity / const.c).decompose().value
        else:
            redshift = 0

        # generate the spectral library (no dust extinction)
        (spec_fname, g_spec) = make_spectral_grid(
            datamodel.project,
            oiso,
            osl=datamodel.osl,
            redshift=redshift,
            distance=datamodel.distances,
            distance_unit=datamodel.distance_unit,
            add_spectral_properties_kwargs=extra_kwargs)

        # add the stellar priors as weights
        #   also computes the grid weights for the stellar part
        (pspec_fname, g_pspec) = add_stellar_priors(datamodel.project, g_spec)

        # generate the SED grid by integrating the filter response functions
        #   effect of dust extinction applied before filter integration
        #   also computes the dust priors as weights
        (seds_fname, g_seds) = make_extinguished_sed_grid(
            datamodel.project,
            g_pspec,
            datamodel.filters,
            extLaw=datamodel.extLaw,
            av=datamodel.avs,
            rv=datamodel.rvs,
            fA=datamodel.fAs,
            rv_prior_model=datamodel.rv_prior_model,
            av_prior_model=datamodel.av_prior_model,
            fA_prior_model=datamodel.fA_prior_model,
            spec_fname=modelsedgrid_filename,
            add_spectral_properties_kwargs=extra_kwargs)

    if ast:

        N_models = datamodel.ast_models_selected_per_age
        Nfilters = datamodel.ast_bands_above_maglimit
        Nrealize = datamodel.ast_realization_per_model
        mag_cuts = datamodel.ast_maglimit
        obsdata = datamodel.get_obscat(basename, datamodel.filters)

        if len(mag_cuts) == 1:
            tmp_cuts = mag_cuts
            min_mags = np.zeros(len(datamodel.filters))
            for k, filtername in enumerate(obsdata.filters):
                sfiltername = obsdata.data.resolve_alias(filtername)
                sfiltername = sfiltername.replace('rate', 'vega')
                sfiltername = sfiltername.replace('RATE', 'VEGA')
                keep, = np.where(obsdata[sfiltername] < 99.)
                min_mags[k] = np.percentile(obsdata[keep][sfiltername], 90.)

            # max. mags from the gst observation cat.
            mag_cuts = min_mags + tmp_cuts

        outfile = './' + datamodel.project + '/' + datamodel.project + '_inputAST.txt'
        outfile_params = './' + datamodel.project + '/' + datamodel.project + '_ASTparams.fits'
        chosen_seds = pick_models(modelsedgrid_filename,
                                  datamodel.filters,
                                  mag_cuts,
                                  Nfilter=Nfilters,
                                  N_stars=N_models,
                                  Nrealize=Nrealize,
                                  outfile=outfile,
                                  outfile_params=outfile_params)

        if datamodel.ast_with_positions == True:
            separation = datamodel.ast_pixel_distribution
            filename = datamodel.project + '/' + datamodel.project + '_inputAST.txt'

            if datamodel.ast_reference_image is not None:
                # With reference image, use the background or source density map if available
                if datamodel.ast_density_table is not None:
                    pick_positions_from_map(
                        obsdata,
                        chosen_seds,
                        datamodel.ast_density_table,
                        datamodel.ast_N_bins,
                        datamodel.ast_realization_per_model,
                        outfile=filename,
                        refimage=datamodel.ast_reference_image,
                        refimage_hdu=0,
                        Nrealize=1,
                        set_coord_boundary=datamodel.ast_coord_boundary)
                else:
                    pick_positions(obsdata,
                                   filename,
                                   separation,
                                   refimage=datamodel.ast_reference_image)

            else:
                # Without reference image, we can only use this function
                if datamodel.ast_density_table is None:
                    pick_positions(obsdata, filename, separation)
                else:
                    print(
                        "To use ast_density_table, ast_reference_image must be specified."
                    )

    if observationmodel:
        print('Generating noise model from ASTs and absflux A matrix')

        # get the modesedgrid on which to generate the noisemodel
        modelsedgrid = FileSEDGrid(modelsedgrid_filename)

        # generate the AST noise model
        noisemodel.make_toothpick_noise_model( \
            datamodel.noisefile,
            datamodel.astfile,
            modelsedgrid,
            use_rate=True,
            absflux_a_matrix=datamodel.absflux_a_matrix)

    if trim:
        print('Trimming the model and noise grids')

        # read in the observed data
        obsdata = datamodel.get_obscat(basename, datamodel.filters)

        # get the modesedgrid on which to generate the noisemodel
        modelsedgrid = FileSEDGrid(modelsedgrid_filename)

        # read in the noise model just created
        noisemodel_vals = noisemodel.get_noisemodelcat(datamodel.noisefile)

        # trim the model sedgrid
        trim_grid.trim_models(modelsedgrid,
                              noisemodel_vals,
                              obsdata,
                              sed_trimname,
                              noisemodel_trimname,
                              sigma_fac=3.)

    if fitting:
        start_time = time.clock()

        # read in the the AST noise model
        noisemodel_vals = noisemodel.get_noisemodelcat(noisemodel_trimname)

        # read in the observed data
        obsdata = datamodel.get_obscat(datamodel.obsfile, datamodel.filters)

        # output files
        statsfile = stats_filebase + '_stats.fits'
        pdf1dfile = statsfile.replace('stats.fits', 'pdf1d.fits')
        lnpfile = statsfile.replace('stats.fits', 'lnp.hd5')

        fit.summary_table_memory(obsdata,
                                 noisemodel_vals,
                                 sed_trimname,
                                 resume=resume,
                                 threshold=-10.,
                                 save_every_npts=100,
                                 lnp_npts=500,
                                 stats_outname=statsfile,
                                 pdf1d_outname=pdf1dfile,
                                 lnp_outname=lnpfile,
                                 surveyname=datamodel.surveyname)

        new_time = time.clock()
        print('time to fit: ', (new_time - start_time) / 60., ' min')
Ejemplo n.º 5
0
def make_ast_inputs(flux_bin_method=True):
    """
    Make the list of artificial stars to be run through the photometry pipeline

    Parameters
    ----------
    flux_bin_method : boolean (default=True)
        If True, use the flux bin method to select SEDs.  If False, randomly
        select SEDs from the model grid.

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # read in the photometry catalog
    obsdata = datamodel.get_obscat(datamodel.obsfile, datamodel.filters)

    # --------------------
    # select SEDs
    # --------------------

    Nrealize = datamodel.ast_realization_per_model
    Nfilters = datamodel.ast_bands_above_maglimit

    # file names for stars and corresponding SED parameters
    outfile_seds = "./{0}/{0}_inputAST_seds.txt".format(datamodel.project)
    outfile_params = "./{0}/{0}_ASTparams.fits".format(datamodel.project)

    # if the SED file doesn't exist, create SEDs
    if not os.path.isfile(outfile_seds):

        print("Selecting SEDs for ASTs")

        if flux_bin_method:

            N_fluxes = datamodel.ast_n_flux_bins
            min_N_per_flux = datamodel.ast_n_per_flux_bin
            bins_outfile = "./{0}/{0}_ASTfluxbins.txt".format(datamodel.project)
            modelsedgrid_filename = "./{0}/{0}_seds.grid.hd5".format(datamodel.project)

            chosen_seds = pick_models_toothpick_style(
                modelsedgrid_filename,
                datamodel.filters,
                Nfilters,
                N_fluxes,
                min_N_per_flux,
                outfile=outfile_seds,
                outfile_params=outfile_params,
                bins_outfile=bins_outfile,
            )

        else:

            # construct magnitude cuts

            mag_cuts = datamodel.ast_maglimit

            if len(mag_cuts) == 1:
                tmp_cuts = mag_cuts
                min_mags = np.zeros(len(datamodel.filters))
                for k, filtername in enumerate(obsdata.filters):
                    sfiltername = obsdata.data.resolve_alias(filtername)
                    sfiltername = sfiltername.replace("rate", "vega")
                    sfiltername = sfiltername.replace("RATE", "VEGA")
                    (keep,) = np.where(obsdata[sfiltername] < 99.0)
                    min_mags[k] = np.percentile(obsdata[keep][sfiltername], 90.0)

                # max. mags from the gst observation cat.
                mag_cuts = min_mags + tmp_cuts


            N_models = datamodel.ast_models_selected_per_age

            chosen_seds = pick_models(
                modelsedgrid_filename,
                datamodel.filters,
                mag_cuts,
                Nfilter=Nfilters,
                N_stars=N_models,
                Nrealize=Nrealize,
                outfile=outfile_seds,
                outfile_params=outfile_params,
            )

    # if the SED file does exist, read them in
    else:
        print("Reading existing AST SEDs")
        chosen_seds = Table.read(outfile_seds, format="ascii")

    # --------------------
    # assign positions
    # --------------------

    # if we want ASTs with positions included (rather than just the fluxes from
    # the section above)
    if datamodel.ast_with_positions:

        print("Assigning positions to artifical stars")

        outfile = "./{0}/{0}_inputAST.txt".format(datamodel.project)

        # if we're replicating SEDs across source density or background bins
        if datamodel.ast_density_table is not None:
            make_ast_xy_list.pick_positions_from_map(
                obsdata,
                chosen_seds,
                datamodel.ast_density_table,
                datamodel.ast_N_bins,
                datamodel.ast_realization_per_model,
                outfile=outfile,
                refimage=datamodel.ast_reference_image,
                refimage_hdu=1,
                wcs_origin=1,
                Nrealize=1,
                set_coord_boundary=datamodel.ast_coord_boundary,
                region_from_filters="all",
            )

        # if we're not using SD/background maps, SEDs will be distributed
        # based on catalog sources
        else:
            make_ast_xy_list.pick_positions(
                obsdata,
                outfile,
                datamodel.ast_pixel_distribution,
                refimage=datamodel.ast_reference_image,
            )
Ejemplo n.º 6
0
def setup_batch_beast_fit(
    num_percore=5,
    nice=None,
    overwrite_logfile=True,
    prefix=None,
    use_sd=True,
    nsubs=1,
    nprocs=1,
):
    """
    Sets up batch files for submission to the 'at' queue on
    linux (or similar) systems

    Parameters
    ----------
    num_percore : int (default = 5)
        number of fitting runs per core

    nice : int (default = None)
        set this to an integer (-20 to 20) to prepend a "nice" level
        to the fitting command

    overwrite_logfile : boolean (default = True)
        if True, will overwrite the log file; if False, will append to
        existing log file

    prefix : string (default=None)
        Set this to a string (such as 'source activate astroconda') to prepend
        to each batch file (use '\n's to make multiple lines)

    use_sd : boolean (default=True)
        If True, split runs based on source density (determined by finding
        matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use when doing the fitting
        (currently only implemented for subgrids)


    Returns
    -------
    run_info_dict : dict
        Dictionary indicating which catalog files have complete modeling, and
        which job files need to be run

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # setup the subdirectory for the batch and log files
    job_path = datamodel.project + "/fit_batch_jobs/"
    if not os.path.isdir(job_path):
        os.mkdir(job_path)

    log_path = job_path + "logs/"
    if not os.path.isdir(log_path):
        os.mkdir(log_path)

    # get file name lists (to check if they exist and/or need to be resumed)
    file_dict = create_filenames.create_filenames(use_sd=use_sd, nsubs=nsubs)

    # - input files
    photometry_files = file_dict["photometry_files"]
    # modelsedgrid_files = file_dict['modelsedgrid_files']
    # noise_files = file_dict['noise_files']

    # - output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    lnp_files = file_dict["lnp_files"]

    # - total number of files
    n_files = len(photometry_files)

    # - other useful info
    sd_sub_info = file_dict["sd_sub_info"]
    gridsub_info = file_dict["gridsub_info"]

    # names of output log files
    log_files = []

    for i in range(n_files):

        sd_piece = ""
        if use_sd is True:
            sd_piece = "_SD" + sd_sub_info[i][0] + "_sub" + sd_sub_info[i][1]

        gridsub_piece = ""
        if nsubs > 1:
            gridsub_piece = "_gridsub" + str(gridsub_info[i])

        log_files.append("beast_fit" + sd_piece + gridsub_piece + ".log")

    # start making the job files!

    pf_open = False
    cur_f = 0
    cur_total_size = 0.0
    j = -1

    # keep track of which files are done running
    run_info_dict = {
        "phot_file": photometry_files,
        "done": np.full(n_files, False),
        "files_to_run": [],
    }

    for i, phot_file in enumerate(photometry_files):

        print("")

        # check if this is a full run
        reg_run = False
        run_done = False
        if not os.path.isfile(stats_files[i]):
            reg_run = True
            print("no stats file")
        if not os.path.isfile(pdf_files[i]):
            reg_run = True
            print("no pdf1d file")
        if not os.path.isfile(lnp_files[i]):
            reg_run = True
            print("no lnp file")

        # first check if the pdf1d mass spacing is correct
        if not reg_run:
            hdulist = fits.open(pdf_files[i])
            delta1 = hdulist["M_ini"].data[-1, 1] - hdulist["M_ini"].data[-1,
                                                                          0]
            if delta1 > 1.0:  # old linear spacing
                print("pdf1d lin mass spacing - full refitting needed")
                old_mass_spacing = True
            else:
                old_mass_spacing = False
                print("pdf1d log mass spacing - ok")

            if old_mass_spacing:
                run_done = False
                reg_run = True

        # now check if the number of results is the same as
        #    the number of observations
        if not reg_run:
            # get the observed catalog
            obs = Table.read(photometry_files[i])

            # get the fit results catalog
            t = Table.read(stats_files[i])
            # get the number of stars that have been fit
            indxs, = np.where(t["Pmax"] != 0.0)

            # get the number of entries in the lnp file
            f = tables.open_file(lnp_files[i], "r")
            nlnp = f.root._v_nchildren - 2
            f.close()

            print("# obs, stats, lnp = ", len(obs), len(indxs), nlnp)
            if (len(indxs) == len(obs)) & (nlnp == len(obs)):

                # final check, is the pdf1d file correctly populated
                tot_prob = np.sum(hdulist["M_ini"].data, axis=1)
                tindxs, = np.where(tot_prob > 0.0)
                print("# good pdf1d = ", len(tindxs) - 1)
                if len(tindxs) == (len(obs) + 1):
                    run_done = True

        if run_done:
            print(stats_files[i] + " done")
            run_info_dict["done"][i] = True
        else:
            pf = None
            j += 1
            if j % num_percore == 0:
                cur_f += 1

                # close previous files
                if j != 0:
                    pf.close()
                    print(
                        "total sed_trim size [Gb] = ",
                        cur_total_size / (1024.0 * 1024.0 * 1024.0),
                    )
                    cur_total_size = 0.0

                # open the slurm and param files
                pf_open = True
                joblist_file = job_path + "beast_batch_fit_" + str(
                    cur_f) + ".joblist"
                pf = open(joblist_file, "w")
                run_info_dict["files_to_run"].append(joblist_file)

                # write out anything at the beginning of the file
                if prefix is not None:
                    pf.write(prefix + "\n")

            # flag for resuming
            resume_str = ""
            if reg_run:
                print(stats_files[i] + " does not exist " +
                      "- adding job as a regular fit job (not resume job)")
            else:
                print(stats_files[i] +
                      " not done - adding to continue fitting list (" +
                      str(len(indxs)) + "/" + str(len(t["Pmax"])) + ")")
                resume_str = "-r"

            # prepend a `nice` value
            nice_str = ""
            if nice is not None:
                nice_str = "nice -n" + str(int(nice)) + " "

            # choose whether to append or overwrite log file
            pipe_str = " > "
            if not overwrite_logfile:
                pipe_str = " >> "

            # set SD+sub option
            sd_str = ""
            if use_sd is True:
                sd_str = ' --choose_sd_sub "{0}" "{1}" '.format(
                    sd_sub_info[i][0], sd_sub_info[i][1])

            # set gridsub option
            gs_str = ""
            if nsubs > 1:
                gs_str = " --choose_subgrid {0} ".format(gridsub_info[i])

            job_command = (nice_str +
                           "python -m beast.run_beast.run_fitting " +
                           resume_str + sd_str + gs_str + " --nsubs " +
                           str(nsubs) + " --nprocs " + str(nprocs) + pipe_str +
                           log_path + log_files[i])

            pf.write(job_command + "\n")

    if pf_open:
        pf.close()

    # return the info about completed modeling
    return run_info_dict
Ejemplo n.º 7
0
def run_fitting(
    use_sd=True,
    nsubs=1,
    nprocs=1,
    choose_sd_sub=None,
    choose_subgrid=None,
    pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'],
    resume=False,
):
    """
    Run the fitting.  If nsubs > 1, this will find existing subgrids.
    If use_sd is True, will also incorporate source density info.

    The additional choose_* options are to make queue scripts usable,
    by specifying a given SD+sub and/or subgrid for the fitting run.


    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    choose_sd_sub : list of two strings (default=None)
        If this is set, the fitting will just be for this combo of SD+sub,
        rather than all of them.  Overrides use_sd.
        format of the list: ['#','#']

    choose_subgrid : int (default=None)
        If this is set, the fitting with just be for this subgrid index.
        If nsubs=1, this is ignored.

    pdf2d_param_list : list of strings or None
        If set, do 2D PDFs of these parameters.  If None, don't make 2D PDFs.

    resume : boolean (default=False)
        choose whether to resume existing run or start over

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # keep track of time
    start_time = time.clock()

    # --------------------
    # make lists of file names
    # --------------------

    file_dict = create_filenames.create_filenames(
        use_sd=use_sd,
        nsubs=nsubs,
        choose_sd_sub=choose_sd_sub,
        choose_subgrid=choose_subgrid,
    )

    # input files
    photometry_files = file_dict["photometry_files"]
    # modelsedgrid_files = file_dict["modelsedgrid_files"]
    modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"]
    # noise_files = file_dict["noise_files"]
    noise_trim_files = file_dict["noise_trim_files"]

    # output files
    stats_files = file_dict["stats_files"]
    pdf_files = file_dict["pdf_files"]
    pdf2d_files = file_dict["pdf2d_files"]
    if pdf2d_param_list is None:
        pdf2d_files = [None for i in range(len(pdf2d_files))]
    lnp_files = file_dict["lnp_files"]

    # total number of files
    n_files = len(photometry_files)

    # other potentially useful info
    sd_sub_info = file_dict["sd_sub_info"]
    # gridsub_info = file_dict['gridsub_info']

    # if using subgrids, make the grid dictionary file:
    # File where the ranges and number of unique values for the grid
    # will be stored (this can take a while to calculate)

    if nsubs > 1:

        gridpickle_files = file_dict["gridpickle_files"]

        for i in range(len(gridpickle_files)):
            if not os.path.isfile(gridpickle_files[i]):

                # list of corresponding SED grids and noise models

                # - with SD+sub: get file list for ALL subgrids at current SD+sub
                if use_sd or (choose_sd_sub is not None):
                    temp = create_filenames.create_filenames(
                        nsubs=nsubs, choose_sd_sub=sd_sub_info[i], choose_subgrid=None
                    )
                    modelsedgrid_trim_list = temp["modelsedgrid_trim_files"]
                    noise_trim_list = temp["noise_trim_files"]

                # - no SD info: get file list for ALL subgrids
                else:
                    temp = create_filenames.create_filenames(
                        use_sd=False, nsubs=nsubs, choose_subgrid=None
                    )
                    modelsedgrid_trim_list = temp["modelsedgrid_trim_files"]
                    noise_trim_list = temp["noise_trim_files"]

                # create the grid info dictionary
                print("creating grid_info_dict for " + gridpickle_files[i])
                grid_info_dict = subgridding_tools.reduce_grid_info(
                    modelsedgrid_trim_list, noise_trim_list, nprocs=nprocs
                )
                # save it
                with open(gridpickle_files[i], "wb") as p:
                    pickle.dump(grid_info_dict, p)
                print("wrote grid_info_dict to " + gridpickle_files[i])

    # --------------------
    # do the fitting!
    # --------------------

    # set up function inputs

    if nsubs == 1:

        input_list = [
            (
                photometry_files[i],
                modelsedgrid_trim_files[i],
                noise_trim_files[i],
                stats_files[i],
                pdf_files[i],
                pdf2d_files[i],
                pdf2d_param_list,
                lnp_files[i],
                None,
                resume,
            )
            for i in range(n_files)
        ]

    if nsubs > 1:

        input_list = [
            (
                photometry_files[i],
                modelsedgrid_trim_files[i],
                noise_trim_files[i],
                stats_files[i],
                pdf_files[i],
                pdf2d_files[i],
                pdf2d_param_list,
                lnp_files[i],
                gridpickle_files[i],
                resume,
            )
            for i in range(n_files)
        ]

    # run the fitting (via parallel wrapper)

    parallel_wrapper(fit_submodel, input_list, nprocs=nprocs)

    # see how long it took!
    new_time = time.clock()
    print("time to fit: ", (new_time - start_time) / 60.0, " min")
Ejemplo n.º 8
0
def test_verifyparams_error():
    """Test: verify_params for case of warning raising exception."""
    with pytest.raises(UserWarning) as exc:
        verify_params.verify_input_format(datamodel_mock_nofA())
    assert exc.value.args[0] == "fAs is not defined."
Ejemplo n.º 9
0
def create_filenames(use_sd=True,
                     nsubs=1,
                     choose_sd_sub=None,
                     choose_subgrid=None):
    """
    Helper function to make all of the filenames.  SED grid and noise model
    are trimmed versions.

    Parameters
    ----------
    use_sd : boolean (default=True)
        If True, create source density dependent noise models (determined by
        finding matches to datamodel.astfile with SD info)

    nsubs : int (default=1)
        number of subgrids used for the physics model

    choose_sd_sub : list of two strings (default=None)
        If this is set, the fitting will just be for this combo of SD+sub,
        rather than all of them.  Overrides use_sd.
        format of the list: ['#','#']

    choose_subgrid : int (default=None)
        If this is set, the fitting with just be for this subgrid index.
        If nsubs=1, this is ignored.

    Returns
    -------
    dictionary with the lists of filenames, plus the corresponding SD+sub and
    gridsub values for easy referencing

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # input files
    photometry_files = []
    modelsedgrid_files = []
    modelsedgrid_trim_files = []
    noise_files = []
    noise_trim_files = []

    # output files
    stats_files = []
    pdf_files = []
    pdf2d_files = []
    lnp_files = []

    # other potentially useful things
    sd_sub_info = []
    gridsub_info = []

    # ** no subgrids **

    if nsubs == 1:

        # -- SD+sub specified
        if choose_sd_sub is not None:

            photometry_files.append(
                datamodel.obsfile.replace(
                    ".fits",
                    "_bin{0}_sub{1}.fits".format(choose_sd_sub[0],
                                                 choose_sd_sub[1]),
                ))
            modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            modelsedgrid_trim_files.append(
                "{0}/{0}_bin{1}_sub{2}_seds_trim.grid.hd5".format(
                    datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            noise_files.append("{0}/{0}_noisemodel_bin{1}.grid.hd5".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            noise_trim_files.append(
                "{0}/{0}_bin{1}_sub{2}_noisemodel_trim.grid.hd5".format(
                    datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))

            stats_files.append("{0}/{0}_bin{1}_sub{2}_stats.fits".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            pdf_files.append("{0}/{0}_bin{1}_sub{2}_pdf1d.fits".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            pdf2d_files.append("{0}/{0}_bin{1}_sub{2}_pdf2d.fits".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))
            lnp_files.append("{0}/{0}_bin{1}_sub{2}_lnp.hd5".format(
                datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))

            sd_sub_info.append([choose_sd_sub[0], choose_sd_sub[1]])

        # -- using source density info
        elif use_sd is True:

            photometry_files = sorted(
                glob.glob(datamodel.obsfile.replace(".fits",
                                                    "_bin*_sub*.fits")))

            for phot_file in photometry_files:
                # get the sd/sub number
                dpos = phot_file.rfind("_bin")
                spos = phot_file.rfind("sub")
                ppos = phot_file.rfind(".")
                curr_sd = phot_file[dpos + 4:spos - 1]
                curr_sub = phot_file[spos + 3:ppos]

                # construct other file names
                modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format(
                    datamodel.project, curr_sd, curr_sub))
                modelsedgrid_trim_files.append(
                    "{0}/{0}_bin{1}_sub{2}_seds_trim.grid.hd5".format(
                        datamodel.project, curr_sd, curr_sub))
                noise_files.append("{0}/{0}_noisemodel_bin{1}.grid.hd5".format(
                    datamodel.project, curr_sd, curr_sub))
                noise_trim_files.append(
                    "{0}/{0}_bin{1}_sub{2}_noisemodel_trim.grid.hd5".format(
                        datamodel.project, curr_sd, curr_sub))

                stats_files.append("{0}/{0}_bin{1}_sub{2}_stats.fits".format(
                    datamodel.project, curr_sd, curr_sub))
                pdf_files.append("{0}/{0}_bin{1}_sub{2}_pdf1d.fits".format(
                    datamodel.project, curr_sd, curr_sub))
                pdf2d_files.append("{0}/{0}_bin{1}_sub{2}_pdf2d.fits".format(
                    datamodel.project, curr_sd, curr_sub))
                lnp_files.append("{0}/{0}_bin{1}_sub{2}_lnp.hd5".format(
                    datamodel.project, curr_sd, curr_sub))

                sd_sub_info.append([curr_sd, curr_sub])

        # -- no source density splitting
        else:

            photometry_files.append(datamodel.obsfile)
            modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format(
                datamodel.project))
            modelsedgrid_trim_files.append("{0}/{0}_seds_trim.grid.hd5".format(
                datamodel.project))
            noise_files.append("{0}/{0}_noisemodel.grid.hd5".format(
                datamodel.project))
            noise_trim_files.append("{0}/{0}_noisemodel_trim.grid.hd5".format(
                datamodel.project))

            stats_files.append("{0}/{0}_stats.fits".format(datamodel.project))
            pdf_files.append("{0}/{0}_pdf1d.fits".format(datamodel.project))
            pdf2d_files.append("{0}/{0}_pdf2d.fits".format(datamodel.project))
            lnp_files.append("{0}/{0}_lnp.hd5".format(datamodel.project))

    # ** with subgrids **

    # subgrids require a pickle file with grid info
    gridpickle_files = []

    if nsubs > 1:

        # start with getting the model grid files (note these aren't trimmed ones)
        outdir = os.path.join(".", datamodel.project)
        subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")
        temp = get_modelsubgridfiles(subgrid_names_file)
        # use that to get the number of subgrids and make a list of them
        gridsub_list = np.arange(len(temp))
        # or a subset if set
        if choose_subgrid is not None:
            gridsub_list = [choose_subgrid]

        # -- SD+sub specified
        if choose_sd_sub is not None:

            for gridsub in gridsub_list:

                photometry_files.append(
                    datamodel.obsfile.replace(
                        ".fits",
                        "_bin{0}_sub{1}.fits".format(choose_sd_sub[0],
                                                     choose_sd_sub[1]),
                    ))

                modelsedgrid_files.append("{0}/{0}_seds.gridsub{3}.hd5".format(
                    datamodel.project, choose_sd_sub[0], choose_sd_sub[1],
                    gridsub))
                modelsedgrid_trim_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_seds_trim.grid.hd5"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                noise_files.append(
                    "{0}/{0}_noisemodel_bin{1}.gridsub{3}.hd5".format(
                        datamodel.project, choose_sd_sub[0], choose_sd_sub[1],
                        gridsub))
                noise_trim_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_noisemodel_trim.grid.hd5"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))

                stats_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_stats.fits"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                pdf_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf1d.fits"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                pdf2d_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf2d.fits"
                    .format(datamodel.project, choose_sd_sub[0],
                            choose_sd_sub[1], gridsub))
                lnp_files.append(
                    "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_lnp.hd5".
                    format(datamodel.project, choose_sd_sub[0],
                           choose_sd_sub[1], gridsub))

                gridpickle_files.append(
                    "{0}/bin{1}_sub{2}/grid_info_dict.pkl".format(
                        datamodel.project, choose_sd_sub[0], choose_sd_sub[1]))

                sd_sub_info.append([choose_sd_sub[0], choose_sd_sub[1]])
                gridsub_info.append(gridsub)

        # -- using source density info
        elif use_sd is True:

            phot_file_list = sorted(
                glob.glob(datamodel.obsfile.replace(".fits",
                                                    "_bin*_sub*.fits")))

            for phot_file in phot_file_list:
                # get the sd/sub number
                dpos = phot_file.rfind("_bin")
                spos = phot_file.rfind("sub")
                ppos = phot_file.rfind(".")
                curr_sd = phot_file[dpos + 4:spos - 1]
                curr_sub = phot_file[spos + 3:ppos]

                # construct other file names
                for gridsub in gridsub_list:
                    photometry_files.append(phot_file)
                    modelsedgrid_files.append(
                        "{0}/{0}_seds.gridsub{3}.hd5".format(
                            datamodel.project, curr_sd, curr_sub, gridsub))
                    modelsedgrid_trim_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_seds_trim.grid.hd5"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))
                    noise_files.append(
                        "{0}/{0}_noisemodel_bin{1}.gridsub{3}.hd5".format(
                            datamodel.project, curr_sd, curr_sub, gridsub))
                    noise_trim_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_noisemodel_trim.grid.hd5"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))

                    stats_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_stats.fits"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))
                    pdf_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf1d.fits"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))
                    pdf2d_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf2d.fits"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))
                    lnp_files.append(
                        "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_lnp.hd5"
                        .format(datamodel.project, curr_sd, curr_sub, gridsub))

                    gridpickle_files.append(
                        "{0}/bin{1}_sub{2}/grid_info_dict.pkl".format(
                            datamodel.project, curr_sd, curr_sub))

                    sd_sub_info.append([curr_sd, curr_sub])
                    gridsub_info.append(gridsub)

        # -- no source density splitting
        else:

            for gridsub in gridsub_list:
                photometry_files.append(datamodel.obsfile)
                modelsedgrid_files.append("{0}/{0}_seds.gridsub{1}.hd5".format(
                    datamodel.project, gridsub))
                modelsedgrid_trim_files.append(
                    "{0}/{0}_gridsub{1}_seds_trim.grid.hd5".format(
                        datamodel.project, gridsub))
                noise_files.append("{0}/{0}_noisemodel.gridsub{1}.hd5".format(
                    datamodel.project, gridsub))
                noise_trim_files.append(
                    "{0}/{0}_gridsub{1}_noisemodel_trim.grid.hd5".format(
                        datamodel.project, gridsub))

                stats_files.append("{0}/{0}_gridsub{1}_stats.fits".format(
                    datamodel.project, gridsub))
                pdf_files.append("{0}/{0}_gridsub{1}_pdf1d.fits".format(
                    datamodel.project, gridsub))
                pdf2d_files.append("{0}/{0}_gridsub{1}_pdf2d.fits".format(
                    datamodel.project, gridsub))
                lnp_files.append("{0}/{0}_gridsub{1}_lnp.hd5".format(
                    datamodel.project, gridsub))

                gridpickle_files.append("{0}/grid_info_dict.pkl".format(
                    datamodel.project))

                gridsub_info.append(gridsub)

    # double check that all file lists are the same length
    n_file_list = [
        len(x) for x in [
            photometry_files,
            modelsedgrid_files,
            modelsedgrid_trim_files,
            noise_files,
            noise_trim_files,
            stats_files,
            pdf_files,
            pdf2d_files,
            lnp_files,
        ]
    ]
    if len(np.unique(n_file_list)) > 1:
        print("file list lengths don't match!")
        return None

    return {
        "photometry_files": photometry_files,
        "modelsedgrid_files": modelsedgrid_files,
        "modelsedgrid_trim_files": modelsedgrid_trim_files,
        "noise_files": noise_files,
        "noise_trim_files": noise_trim_files,
        "stats_files": stats_files,
        "pdf_files": pdf_files,
        "pdf2d_files": pdf2d_files,
        "lnp_files": lnp_files,
        "gridpickle_files": gridpickle_files,
        "sd_sub_info": sd_sub_info,
        "gridsub_info": gridsub_info,
    }
Ejemplo n.º 10
0
def make_trim_scripts(num_subtrim=1, nice=None, prefix=None):
    """
    `setup_batch_beast_trim.py` uses file names to create batch trim files.  This
    generates all of the file names for that function.

    NOTE: This assumes you're using source density or background dependent noise
    models.

    Parameters
    ----------
    num_subtrim : int (default = 1)
        number of trim batch jobs

    nice : int (default = None)
        set this to an integer (-20 to 20) to prepend a "nice" level
        to the trimming command

    prefix : string (default=None)
        Set this to a string (such as 'source activate astroconda') to prepend
        to each batch file (use '\n's to make multiple lines)

    Returns
    -------
    job_files : list of strings
        Names of the newly created job files
    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # make lists of file names
    file_dict = create_filenames.create_filenames(
        use_sd=True, nsubs=datamodel.n_subgrid,
    )
    # extract some useful ones
    photometry_files = file_dict["photometry_files"]
    modelsedgrid_files = file_dict["modelsedgrid_files"]
    noise_files = file_dict["noise_files"]
    modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"]
    noise_trim_files = file_dict["noise_trim_files"]
    # the unique sets of things
    unique_sedgrid = [
        x for i, x in enumerate(modelsedgrid_files) if i == modelsedgrid_files.index(x)
    ]

    # save the list of job files
    job_file_list = []

    # iterate through each model grid
    for i in range(datamodel.n_subgrid):

        # indices for this model grid
        grid_ind = [
            ind
            for ind, mod in enumerate(modelsedgrid_files)
            if mod == unique_sedgrid[i]
        ]

        # create corresponding files for each of those
        input_noise = [noise_files[ind] for ind in grid_ind]
        input_phot = [photometry_files[ind] for ind in grid_ind]
        # to get the trim prefix, find the common string between trimmed noise
        # files and trimmed SED files
        input_trim_prefix = []
        for ind in grid_ind:
            str1 = modelsedgrid_trim_files[ind]
            str2 = noise_trim_files[ind]
            # find longest match
            match = SequenceMatcher(None, str1, str2).find_longest_match(
                0, len(str1), 0, len(str2)
            )
            # grab that substring (and remove trailing "_")
            input_trim_prefix.append(str1[match.a : match.a + match.size][:-1])

        # check if the trimmed grids exist before moving on
        check_trim = [os.path.isfile(noise_trim_files[ind]) for ind in grid_ind]

        # if any aren't trimmed for this model grid, set up trimming
        if np.sum(check_trim) < len(input_noise):

            job_path = "./{0}/trim_batch_jobs/".format(datamodel.project)
            if datamodel.n_subgrid > 1:
                file_prefix = "BEAST_gridsub" + str(i)
            if datamodel.n_subgrid == 1:
                file_prefix = "BEAST"

            # generate trimming at-queue commands
            setup_batch_beast_trim.generic_batch_trim(
                unique_sedgrid[i],
                input_noise,
                input_phot,
                input_trim_prefix,
                job_path=job_path,
                file_prefix=file_prefix,
                num_subtrim=num_subtrim,
                nice=nice,
                prefix=prefix,
            )

            job_file_list.append(job_path + file_prefix + "_batch_trim.joblist")

    return job_file_list
Ejemplo n.º 11
0
def make_ast_inputs(flux_bin_method=True):
    """
    Make the list of artificial stars to be run through the photometry pipeline

    Parameters
    ----------
    flux_bin_method : boolean (default=True)
        If True, use the flux bin method to select SEDs.  If False, randomly
        select SEDs from the model grid.

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # construct magnitude cuts

    mag_cuts = datamodel.ast_maglimit
    obsdata = datamodel.get_obscat(datamodel.obsfile, datamodel.filters)

    if len(mag_cuts) == 1:
        tmp_cuts = mag_cuts
        min_mags = np.zeros(len(datamodel.filters))
        for k, filtername in enumerate(obsdata.filters):
            sfiltername = obsdata.data.resolve_alias(filtername)
            sfiltername = sfiltername.replace("rate", "vega")
            sfiltername = sfiltername.replace("RATE", "VEGA")
            keep, = np.where(obsdata[sfiltername] < 99.0)
            min_mags[k] = np.percentile(obsdata[keep][sfiltername], 90.0)

        # max. mags from the gst observation cat.
        mag_cuts = min_mags + tmp_cuts

    # --------------------
    # select SEDs
    # --------------------

    Nrealize = datamodel.ast_realization_per_model
    Nfilters = datamodel.ast_bands_above_maglimit

    # file names for stars and corresponding SED parameters
    outfile = "./" + datamodel.project + "/" + datamodel.project + "_inputAST.txt"
    outfile_params = (
        "./" + datamodel.project + "/" + datamodel.project + "_ASTparams.fits"
    )

    if flux_bin_method:

        N_fluxes = datamodel.ast_n_flux_bins
        min_N_per_flux = datamodel.ast_n_per_flux_bin
        bins_outfile = (
            "./" + datamodel.project + "/" + datamodel.project + "_ASTfluxbins.txt"
        )

        chosen_seds = pick_models_toothpick_style(
            modelsedgrid_filename,
            datamodel.filters,
            mag_cuts,
            Nfilters,
            N_fluxes,
            min_N_per_flux,
            outfile=outfile,
            outfile_params=outfile_params,
            bins_outfile=bins_outfile,
        )

    else:

        N_models = datamodel.ast_models_selected_per_age

        chosen_seds = pick_models(
            modelsedgrid_filename,
            datamodel.filters,
            mag_cuts,
            Nfilter=Nfilters,
            N_stars=N_models,
            Nrealize=Nrealize,
            outfile=outfile,
            outfile_params=outfile_params,
        )

    # --------------------
    # assign positions
    # --------------------

    if datamodel.ast_with_positions:
        separation = datamodel.ast_pixel_distribution
        filename = datamodel.project + "/" + datamodel.project + "_inputAST.txt"

        if datamodel.ast_reference_image is not None:
            # With reference image, use one of these options
            if datamodel.ast_source_density_table is not None:
                pick_positions_from_map(
                    obsdata,
                    chosen_seds,
                    datamodel.ast_source_density_table,
                    datamodel.ast_N_bins,
                    datamodel.ast_realization_per_model,
                    outfile=filename,
                    refimage=datamodel.ast_reference_image,
                    refimage_hdu=0,
                    Nrealize=1,
                    set_coord_boundary=datamodel.ast_coord_boundary,
                )

            elif datamodel.ast_background_table is not None:
                pick_positions_from_map(
                    obsdata,
                    chosen_seds,
                    datamodel.ast_background_table,
                    datamodel.ast_N_bins,
                    datamodel.ast_realization_per_model,
                    outfile=filename,
                    refimage=datamodel.ast_reference_image,
                    refimage_hdu=0,
                    Nrealize=1,
                    set_coord_boundary=datamodel.ast_coord_boundary,
                )
            else:
                pick_positions(
                    obsdata,
                    filename,
                    separation,
                    refimage=datamodel.ast_reference_image,
                )

        else:
            # Without reference image, we can only use this function
            if (
                datamodel.ast_source_density_table is None
                and datamodel.ast_background_table is None
            ):
                pick_positions(obsdata, filename, separation)
            else:
                print(
                    "To use ast_source_density_table or ast_background_table, ast_reference_image must be specified."
                )
Ejemplo n.º 12
0
def test_verifyparams_allowwarnRV():
    """Test: verify_params for case of warning with no exception."""
    with pytest.warns(UserWarning, match="Note: rvs grid is single-valued."):
        verify_params.verify_input_format(datamodel_mock_allowwarnRV())
Ejemplo n.º 13
0
def test_verifyparams_noallowRV():
    """Test: verify_params when warn raising except w/ allow_warnings=False"""
    with pytest.raises(UserWarning) as exc:
        verify_params.verify_input_format(datamodel_mock_noallowRV())
    assert exc.value.args[0] == "Note: rvs grid is single-valued."
Ejemplo n.º 14
0
def test_verifyparams_errorRV():
    """Test: verify_params for case of warning raising exception."""
    with pytest.raises(UserWarning) as exc:
        verify_params.verify_input_format(datamodel_mock_RV())
    assert exc.value.args[0] == "Note: rvs grid is single-valued."
Ejemplo n.º 15
0
def test_verifyparams_allowwarn():
    """Test: verify_params for case of warning with no exception."""
    with pytest.warns(UserWarning, match="fAs is not defined."):
        verify_params.verify_input_format(datamodel_mock_allowwarn())
Ejemplo n.º 16
0
    parser.add_argument("-t",
                        "--trim",
                        help="Trim the physics and observation model grids",
                        action="store_true")
    parser.add_argument("-f",
                        "--fit",
                        help="Fit the observed data",
                        action="store_true")
    parser.add_argument("-r",
                        "--resume",
                        help="Resume a fitting run",
                        action="store_true")
    args = parser.parse_args()

    # check input parameters, print what is the problem, stop run_beast
    verify_params.verify_input_format(datamodel)

    if args.physicsmodel:

        # make sure the project directory exists
        pdir = create_project_dir(datamodel.project)

        # download and load the isochrones
        (iso_fname, oiso) = make_iso_table(datamodel.project,
                                           oiso=datamodel.oiso,
                                           logtmin=datamodel.logt[0],
                                           logtmax=datamodel.logt[1],
                                           dlogt=datamodel.logt[2],
                                           z=datamodel.z)

        # calculate the distance in pc
Ejemplo n.º 17
0
def create_physicsmodel(nsubs=1, nprocs=1, subset=[None, None]):
    """
    Create the physics model grid.  If nsubs > 1, this will make sub-grids.


    Parameters
    ----------
    nsubs : int (default=1)
        number of subgrids to split the physics model into

    nprocs : int (default=1)
        Number of parallel processes to use
        (currently only implemented for subgrids)

    subset : list of two ints (default=[None,None])
        Only process subgrids in the range [start,stop].
        (only relevant if nsubs > 1)

    """

    # before doing ANYTHING, force datamodel to re-import (otherwise, any
    # changes within this python session will not be loaded!)
    importlib.reload(datamodel)
    # check input parameters
    verify_params.verify_input_format(datamodel)

    # filename for the SED grid
    modelsedgrid_filename = "%s/%s_seds.grid.hd5" % (
        datamodel.project,
        datamodel.project,
    )

    # grab the current subgrid slice
    subset_slice = slice(subset[0], subset[1])

    # make sure the project directory exists
    create_project_dir(datamodel.project)

    # download and load the isochrones
    (iso_fname, oiso) = make_iso_table(
        datamodel.project,
        oiso=datamodel.oiso,
        logtmin=datamodel.logt[0],
        logtmax=datamodel.logt[1],
        dlogt=datamodel.logt[2],
        z=datamodel.z,
    )

    # remove the isochrone points with logL=-9.999
    oiso = ezIsoch(oiso.selectWhere("*", "logL > -9"))

    if hasattr(datamodel, "add_spectral_properties_kwargs"):
        extra_kwargs = datamodel.add_spectral_properties_kwargs
    else:
        extra_kwargs = None

    if hasattr(datamodel, "velocity"):
        redshift = (datamodel.velocity / const.c).decompose().value
    else:
        redshift = 0

    # generate the spectral library (no dust extinction)
    (spec_fname, g_spec) = make_spectral_grid(
        datamodel.project,
        oiso,
        osl=datamodel.osl,
        redshift=redshift,
        distance=datamodel.distances,
        distance_unit=datamodel.distance_unit,
        extLaw=datamodel.extLaw,
        add_spectral_properties_kwargs=extra_kwargs,
    )

    # add the stellar priors as weights
    #   also computes the grid weights for the stellar part
    (pspec_fname, g_pspec) = add_stellar_priors(
        datamodel.project,
        g_spec,
        age_prior_model=datamodel.age_prior_model,
        mass_prior_model=datamodel.mass_prior_model,
        met_prior_model=datamodel.met_prior_model,
    )

    # --------------------
    # no subgrids
    # --------------------

    if nsubs == 1:
        # generate the SED grid by integrating the filter response functions
        #   effect of dust extinction applied before filter integration
        #   also computes the dust priors as weights
        make_extinguished_sed_grid(
            datamodel.project,
            g_pspec,
            datamodel.filters,
            extLaw=datamodel.extLaw,
            av=datamodel.avs,
            rv=datamodel.rvs,
            fA=datamodel.fAs,
            rv_prior_model=datamodel.rv_prior_model,
            av_prior_model=datamodel.av_prior_model,
            fA_prior_model=datamodel.fA_prior_model,
            spec_fname=modelsedgrid_filename,
            add_spectral_properties_kwargs=extra_kwargs,
        )

    # --------------------
    # use subgrids
    # --------------------

    if nsubs > 1:
        # Work with the whole grid up to there (otherwise, priors need a
        # rework - they don't like having only a subset of the parameter
        # space, especially when there's only one age for example)

        # Make subgrids, by splitting the spectral grid into equal sized pieces
        custom_sub_pspec = subgridding_tools.split_grid(pspec_fname, nsubs)

        file_prefix = "{0}/{0}_".format(datamodel.project)

        # function to process the subgrids individually
        def gen_subgrid(i, sub_name):
            sub_g_pspec = FileSEDGrid(sub_name)
            sub_seds_fname = "{}seds.gridsub{}.hd5".format(file_prefix, i)

            # generate the SED grid by integrating the filter response functions
            #   effect of dust extinction applied before filter integration
            #   also computes the dust priors as weights
            (sub_seds_fname, sub_g_seds) = make_extinguished_sed_grid(
                datamodel.project,
                sub_g_pspec,
                datamodel.filters,
                extLaw=datamodel.extLaw,
                av=datamodel.avs,
                rv=datamodel.rvs,
                fA=datamodel.fAs,
                rv_prior_model=datamodel.rv_prior_model,
                av_prior_model=datamodel.av_prior_model,
                fA_prior_model=datamodel.fA_prior_model,
                add_spectral_properties_kwargs=extra_kwargs,
                seds_fname=sub_seds_fname,
            )

            return sub_seds_fname

        # run the above function
        par_tuples = [
            (i, sub_name) for i, sub_name in enumerate(custom_sub_pspec)
        ][subset_slice]

        parallel_wrapper(gen_subgrid, par_tuples, nprocs=nprocs)

        # Save a list of subgrid names that we expect to see
        required_names = [
            "{}seds.gridsub{}.hd5".format(file_prefix, i) for i in range(nsubs)
        ]

        outdir = os.path.join(".", datamodel.project)
        subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt")

        with open(subgrid_names_file, "w") as fname_file:
            for fname in required_names:
                fname_file.write(fname + "\n")
Ejemplo n.º 18
0
def test_verifyparams_nowarning():
    """Test: verify_params for case of no warnings or exceptions."""
    with pytest.warns(None) as record:
        verify_params.verify_input_format(datamodel_mock())
    assert len(record) == 0