Example #1
0
def beast_production_wrapper():
    """
    This does all of the steps for a full production run, and can be used as
    a wrapper to automatically do most steps for multiple fields.
    * make datamodel.py file
    * make source density map
    * make background density map
    * split catalog by source density
    * make physics model (SED grid)
    * make input list for ASTs
    * make noise model
    * generate batch script to trim models
    * generate batch script to fit models
    * merge stats files back together
    * spatially reorder the results

    Places for user to manually do things:
    * editing code before use
        - datamodel_template.py: setting up the file with desired parameters
        - here: list the catalog filter names with the corresponding BEAST names
        - here: choose settings (pixel size, filter, mag range) for the source density map
        - here: choose settings (pixel size, reference image) for the background map
        - here: choose settings (filter, number per file) for dividing catalog by source density
        - here: choose settings (# files, nice level) for the trimming/fitting batch scripts
    * process the ASTs, as described in BEAST documentation
    * run the trimming scripts
    * run the fitting scripts

    BEWARE: When running the trimming/fitting scripts, ensure that the correct
    datamodel.py file is in use.  Since it gets updated every time this code is
    run, you may be unexpectedly be using one from another field.
    """

    # the list of fields
    field_names = ["15275_IC1613"]

    # distance moduli and velocities
    # http://adsabs.harvard.edu/abs/2013AJ....146...86T
    dist_mod = [24.36]
    velocity = [-236]

    # the path+file for a reference image
    im_path = ["../beast_dwarfs/images/15275_IC1613_F555W_drz.fits.gz"]
    ref_filter = ["F555W"]

    # choose a filter to use for removing artifacts
    # (remove catalog sources with filter_FLAG > 99)
    flag_filter = ["F555W"]

    # number of fields
    n_field = len(field_names)

    # Need to know what the correspondence is between filter names in the
    # catalog and the BEAST filter names.
    #
    # These will be used to automatically determine the filters present in
    # each GST file and fill in the datamodel.py file.  The order doesn't
    # matter, as long as the order in one list matches the order in the other
    # list.
    #
    gst_filter_names = [
        "F275W", "F336W", "F390M", "F555W", "F814W", "F110W", "F160W"
    ]
    beast_filter_names = [
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_WFC3_F390M",
        "HST_WFC3_F555W",
        "HST_WFC3_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
    ]

    for b in range(n_field):
        # for b in [0]:

        print("********")
        print("field " + field_names[b])
        print("********")

        # -----------------
        # data file names
        # -----------------

        # paths for the data/AST files
        gst_file = "./data/" + field_names[b] + ".gst.fits"
        ast_file = "./data/" + field_names[b] + ".gst.fake.fits"
        # path for the reference image (if using for the background map)
        im_file = im_path[b]

        # region file with catalog stars
        # make_region_file(gst_file, ref_filter[b])

        # -----------------
        # 0. make datamodel file
        # -----------------

        # need to do this first, because otherwise any old version that exists
        # will be imported, and changes made here won't get imported again

        print("")
        print("creating datamodel file")
        print("")

        create_datamodel(
            gst_file,
            ast_file,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
        )

        # load in datamodel to get number of subgrids
        import datamodel

        importlib.reload(datamodel)

        # -----------------
        # 1a. make magnitude histograms
        # -----------------

        print("")
        print("making magnitude histograms")
        print("")

        # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'):
        peak_mags = plot_mag_hist.plot_mag_hist(gst_file,
                                                stars_per_bin=70,
                                                max_bins=75)
        # test = plot_mag_hist.plot_mag_hist(ast_file, stars_per_bin=200, max_bins=30)

        # -----------------
        # 1b. make a source density map
        # -----------------

        print("")
        print("making source density map")
        print("")

        # not currently doing background density bins
        # use_bg_info = True
        use_bg_info = False
        if use_bg_info:
            background_args = types.SimpleNamespace(
                subcommand="background",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                reference=im_file,
                mask_radius=10,
                ann_width=20,
                cat_filter=[ref_filter, "90"],
            )
            create_background_density_map.main_make_map(background_args)

        # but we are doing source density bins!
        if not os.path.isfile(
                gst_file.replace(".fits", "_source_den_image.fits")):
            # - pixel size of 10 arcsec
            # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5
            sourceden_args = types.SimpleNamespace(
                subcommand="sourceden",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                mag_name=ref_filter + "_VEGA",
                mag_cut=[15, peak_mags[ref_filter - 0.5]],
                flag_name=flag_filter[b] + '_FLAG',
            )
            create_background_density_map.main_make_map(sourceden_args)

        # new file name with the source density column
        gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits")

        # -----------------
        # 2. make physics model
        # -----------------

        print("")
        print("making physics model")
        print("")

        # see which subgrid files already exist
        gs_str = ""
        if datamodel.n_subgrid > 1:
            gs_str = "sub*"

        sed_files = glob.glob("./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(
            field_names[b], gs_str))

        # only make the physics model they don't already exist
        if len(sed_files) < datamodel.n_subgrid:
            # directly create physics model grids
            #create_physicsmodel.create_physicsmodel(nprocs=1, nsubs=datamodel.n_subgrid)
            # create grids with script
            create_physicsmodel.split_create_physicsmodel(
                nprocs=1, nsubs=datamodel.n_subgrid)
            print('\n**** go run physics model code for ' + field_names[b] +
                  '! ****')
            continue

        # list of SED files
        model_grid_files = sorted(
            glob.glob("./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(
                field_names[b], gs_str)))

        # -----------------
        # 3. make ASTs
        # -----------------

        # only create an AST input list if the ASTs don't already exist
        ast_input_file = ("./" + field_names[b] + "_beast/" + field_names[b] +
                          "_beast_inputAST.txt")

        if not os.path.isfile(ast_file):
            if not os.path.isfile(ast_input_file):
                print("")
                print("creating artificial stars")
                print("")
                make_ast_inputs.make_ast_inputs(flux_bin_method=True)

            split_ast_input_file.split_asts(field_names[b] + "_beast",
                                            ast_input_file, 2000)

            print("\n**** go run ASTs for " + field_names[b] + "! ****\n")
            continue

        # -----------------
        # 4/5. edit photometry/AST catalogs
        # -----------------

        # remove sources that are
        # - in regions without full imaging coverage,
        # - flagged in flag_filter

        print("")
        print("editing photometry/AST catalogs")
        print("")

        # - photometry
        gst_file_cut = gst_file.replace(".fits", "_with_sourceden_cut.fits")
        cut_catalogs.cut_catalogs(
            gst_file_sd,
            gst_file_cut,
            partial_overlap=True,
            flagged=True,
            flag_filter=flag_filter[b],
            region_file=True,
        )

        # - ASTs
        ast_file_cut = ast_file.replace(".fits", "_cut.fits")
        cut_catalogs.cut_catalogs(
            ast_file,
            ast_file_cut,
            partial_overlap=True,
            flagged=True,
            flag_filter=flag_filter[b],
            region_file=True,
        )
        # test = plot_mag_hist.plot_mag_hist(ast_file_cut, stars_per_bin=200, max_bins=30)

        # edit the datamodel.py file to have the correct photometry file name
        # (AST file name is already automatically the cut version)
        create_datamodel(
            gst_file_cut,
            ast_file_cut,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
        )

        # -----------------
        # 6. split observations by source density
        # -----------------

        print("")
        print("splitting observations by source density")
        print("")

        # - photometry

        if len(glob.glob(gst_file_cut.replace('.fits', '*sub*fits'))) == 0:

            # a smaller value for n_per_file will mean more individual files/runs,
            # but each run will take a shorter amount of time

            split_catalog_using_map.split_main(
                gst_file_cut,
                ast_file_cut,
                gst_file.replace('.fits', '_sourceden_map.hd5'),
                bin_width=1,
                n_per_file=6250,
            )

        # -- at this point, we can run the code to create lists of filenames
        file_dict = create_filenames.create_filenames(
            use_sd=True, nsubs=datamodel.n_subgrid)

        # figure out how many files there are
        sd_sub_info = file_dict["sd_sub_info"]
        # - number of SD bins
        temp = set([i[0] for i in sd_sub_info])
        print("** total SD bins: " + str(len(temp)))
        # - the unique sets of SD+sub
        unique_sd_sub = [
            x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
        ]
        print("** total SD subfiles: " + str(len(unique_sd_sub)))

        # -----------------
        # 7. make noise models
        # -----------------

        print("")
        print("making noise models")
        print("")

        # create the noise model (this code will check if it exists)
        create_obsmodel.create_obsmodel(use_sd=True,
                                        nsubs=datamodel.n_subgrid,
                                        nprocs=1)

        # -----------------
        # 8. make script to trim models
        # -----------------

        print("")
        print("setting up script to trim models")
        print("")

        # save any at-queue commands
        at_list = []

        # iterate through each model grid
        for i in range(datamodel.n_subgrid):

            # gst list
            temp = file_dict["photometry_files"]
            gst_input_list = [
                x for i, x in enumerate(temp) if i == temp.index(x)
            ]

            # create corresponding files for each of those
            ast_input_list = []
            noise_files = []
            trim_prefix = []

            for j in range(len(gst_input_list)):
                # get the sd/sub number
                curr_sd = unique_sd_sub[j][0]
                curr_sub = unique_sd_sub[j][1]
                subfolder = "bin{0}_sub{1}".format(curr_sd, curr_sub)

                # create file names
                ast_input_list.append(
                    ast_file_cut.replace(".fits", "_bin" + curr_sd + ".fits"))
                if datamodel.n_subgrid > 1:
                    noise_files.append(
                        "./{0}_beast/{0}_beast_noisemodel_bin{1}.gridsub{2}.hd5"
                        .format(field_names[b], curr_sd, i))

                    trim_prefix.append(
                        "./{0}_beast/{1}/{0}_beast_{1}_gridsub{2}".format(
                            field_names[b], subfolder, i))

                if datamodel.n_subgrid == 1:
                    noise_files.append(file_dict['noise_files'][j])
                    trim_prefix.append("./{0}_beast/{0}_beast_{1}".format(
                        field_names[b], subfolder))

            # check if the trimmed grids exist before moving on
            if datamodel.n_subgrid > 1:
                trim_files = sorted(
                    glob.glob(
                        "./{0}_beast/bin*_sub*/{0}_beast_*_gridsub{1}_sed_trim.grid.hd5"
                        .format(field_names[b], i)))
            if datamodel.n_subgrid == 1:
                trim_files = sorted(
                    glob.glob("./{0}_beast/{0}_beast_*_sub*_sed_trim.grid.hd5".
                              format(field_names[b])))

            if len(trim_files) < len(gst_input_list):

                job_path = "./" + field_names[b] + "_beast/trim_batch_jobs/"
                if datamodel.n_subgrid > 1:
                    file_prefix = "BEAST_gridsub" + str(i)
                if datamodel.n_subgrid == 1:
                    file_prefix = "BEAST"

                # generate trimming at-queue commands
                setup_batch_beast_trim.generic_batch_trim(
                    model_grid_files[i],
                    noise_files,
                    gst_input_list,
                    ast_input_list,
                    trim_prefix,
                    job_path=job_path,
                    file_prefix=file_prefix,
                    num_subtrim=1,
                    nice=19,
                    prefix="source activate b13",
                )

                at_list.append("at -f " + job_path + file_prefix +
                               "_batch_trim.joblist now")

        if len(at_list) > 0:
            print("\n**** go run trimming code for " + field_names[b] +
                  "! ****")
            print("Here are the command(s) to run:")
            for cmd in at_list:
                print(cmd)
            return
        else:
            print("all files are trimmed for " + field_names[b])

        # -----------------
        # 9. make script to fit models
        # -----------------

        print("")
        print("setting up script to fit models")
        print("")

        fit_run_info = setup_batch_beast_fit.setup_batch_beast_fit(
            num_percore=1,
            nice=19,
            overwrite_logfile=False,
            prefix="source activate b13",
            use_sd=True,
            nsubs=datamodel.n_subgrid,
            nprocs=1,
        )

        # check if the fits exist before moving on
        tot_remaining = len(fit_run_info["done"]) - np.sum(
            fit_run_info["done"])
        if tot_remaining > 0:
            print("\n**** go run fitting code for " + field_names[b] +
                  "! ****")
            print("Here are the " + str(len(fit_run_info["files_to_run"])) +
                  " commands to run:")
            for job_file in fit_run_info["files_to_run"]:
                print("at -f ./" + job_file + " now")
            continue
        else:
            print("all fits are complete for " + field_names[b])

        # -----------------
        # 10. merge stats files from each fit
        # -----------------

        print("")
        print("merging stats files")
        print("")

        merge_files.merge_files(use_sd=True, nsubs=datamodel.n_subgrid)
def generate_files_for_tests(run_beast=True, run_tools=True):
    """
    Use the metal_small example to generate a full set of files for the BEAST
    regression tests.

    Parameters
    ----------
    run_beast : boolean (default=True)
        if True, run the BEAST

    run_tools : boolean (default=True)
        if True, run the code to generate things for tools
    """

    # read in BEAST settings
    settings_orig = beast_settings.beast_settings("beast_settings.txt")
    # also make a version with subgrids
    settings_subgrids = copy.deepcopy(settings_orig)
    settings_subgrids.n_subgrid = 2
    settings_subgrids.project = f"{settings_orig.project}_subgrids"

    # ==========================================
    # run the beast for each set of settings
    # ==========================================

    if run_beast:

        for settings in [settings_orig, settings_subgrids]:

            # -----------------
            # physics model
            # -----------------
            create_physicsmodel.create_physicsmodel(
                settings,
                nsubs=settings.n_subgrid,
                nprocs=1,
            )

            # -----------------
            # ASTs
            # -----------------

            # currently only works for no subgrids
            if settings.n_subgrid == 1:
                make_ast_inputs.make_ast_inputs(settings,
                                                pick_method="flux_bin_method")

            # -----------------
            # obs model
            # -----------------
            create_obsmodel.create_obsmodel(
                settings,
                use_sd=False,
                nsubs=settings.n_subgrid,
                nprocs=1,
                use_rate=True,
            )

            # -----------------
            # trimming
            # -----------------

            # make file names
            file_dict = create_filenames.create_filenames(
                settings, use_sd=False, nsubs=settings.n_subgrid)

            # read in the observed data
            obsdata = Observations(settings.obsfile, settings.filters,
                                   settings.obs_colnames)

            for i in range(settings.n_subgrid):

                # get the modesedgrid on which to generate the noisemodel
                modelsedgridfile = file_dict["modelsedgrid_files"][i]
                modelsedgrid = SEDGrid(modelsedgridfile)

                # read in the noise model just created
                noisemodel_vals = noisemodel.get_noisemodelcat(
                    file_dict["noise_files"][i])

                # trim the model sedgrid
                sed_trimname = file_dict["modelsedgrid_trim_files"][i]
                noisemodel_trimname = file_dict["noise_trim_files"][i]

                trim_grid.trim_models(
                    modelsedgrid,
                    noisemodel_vals,
                    obsdata,
                    sed_trimname,
                    noisemodel_trimname,
                    sigma_fac=3.0,
                )

            # -----------------
            # fitting
            # -----------------

            run_fitting.run_fitting(
                settings,
                use_sd=False,
                nsubs=settings.n_subgrid,
                nprocs=1,
                pdf2d_param_list=["Av", "M_ini", "logT"],
                pdf_max_nbins=200,
            )

            # -----------------
            # merging
            # -----------------

            # it'll automatically skip for no subgrids
            merge_files.merge_files(settings,
                                    use_sd=False,
                                    nsubs=settings.n_subgrid)

            print("\n\n")

    # ==========================================
    # reference files for assorted tools
    # ==========================================

    if run_tools:

        # -----------------
        # compare_spec_type
        # -----------------

        # the input settings
        input = {
            "spec_ra": [72.67213351],
            "spec_dec": [-67.71720515],
            "spec_type": ["A"],
            "spec_subtype": [0],
            "lumin_class": ["IV"],
            "match_radius": 0.2,
        }

        # run it
        output = compare_spec_type.compare_spec_type(
            settings_orig.obsfile,
            "{0}/{0}_stats.fits".format(settings_orig.project),
            **input,
        )

        # save the inputs and outputs
        asdf.AsdfFile({
            "input": input,
            "output": output
        }).write_to("{0}/{0}_compare_spec_type.asdf".format(
            settings_orig.project))

        # -----------------
        # star_type_probability
        # -----------------

        # input settings
        input = {
            "output_filebase": None,
            "ext_O_star_params": {
                "min_M_ini": 10,
                "min_Av": 0.5,
                "max_Av": 5
            },
        }

        # run it
        output = star_type_probability.star_type_probability(
            "{0}/{0}_pdf1d.fits".format(settings_orig.project),
            "{0}/{0}_pdf2d.fits".format(settings_orig.project),
            **input,
        )

        # save the inputs and outputs
        asdf.AsdfFile({
            "input": input,
            "output": output
        }).write_to("{0}/{0}_star_type_probability.asdf".format(
            settings_orig.project))

    # ==========================================
    # asdf file permissions
    # ==========================================

    # for unknown reasons, asdf currently writes files with permissions set
    # to -rw-------.  This changes it to -rw-r--r-- (like the rest of the
    # BEAST files) so Karl can easily copy them over to the cached file
    # website.

    # list of asdf files
    asdf_files = glob.glob("*/*.asdf")
    # go through each one to change permissions
    for fname in asdf_files:
        os.chmod(fname,
                 stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)