Beispiel #1
0
def pick_positions_from_map(
    catalog,
    chosen_seds,
    input_map,
    N_bins,
    Npermodel,
    outfile=None,
    refimage=None,
    refimage_hdu=1,
    wcs_origin=1,
    Nrealize=1,
    set_coord_boundary=None,
    region_from_filters=None,
):
    """
    Spreads a set of fake stars across regions of similar values,
    given a map file generated by 'create background density map' or
    'create stellar density map' in the tools directory.

    The tiles of the given map are divided across a given
    number of bins. Each bin will then have its own set of tiles,
    which constitute a region on the image.

    Then, for each bin, the given set of fake stars is duplicated,
    and the stars are assigned random positions within this region.

    This way, it can be ensured that enough ASTs are performed for each
    regime of the map, making it possible to have a separate noise model
    for each of these regions.

    Parameters
    ----------

    catalog: GenFluxCatalog object
        The output object from `datamodel.get_obscat(datamodel.obsfile,
        datamodel.filters)`

    chosen_seds: astropy Table
        Table containing fake stars to be duplicated and assigned positions

    input_map: str
        Path to a hd5 file containing the file written by a DensityMap

    N_bins: int
        The number of bins for the range of background density values.
        The bins will be picked on a linear grid, ranging from the
        minimum to the maximum value of the map. Then, each tile will be
        put in a bin, so that a set of tiles of the map is obtained for
        each range of source density/background values.

    refimage: str
        Path to fits image that is used for the positions. If none is
        given, the ra and dec will be put in the x and y output columns
        instead.

    refimage_hdu: int (default=1)
        index of the HDU from which to get the header, which will be used
        to extract WCS information

    wcs_origin : 0 or 1 (default=1)
        As described in the WCS documentation: "the coordinate in the upper
        left corner of the image. In FITS and Fortran standards, this is 1. 
        In Numpy and C standards this is 0."

    Nrealize: integer
        The number of times each model should be repeated for each
        background regime. This is to sample the variance due to
        variations within each region, for each individual model.

    set_coord_boundary : None, or list of 2 numpy arrays
        If provided, these RA/Dec coordinates will be used to limit the
        region over which ASTs are generated.  Input should be list of two
        arrays, the first RA and the second Dec, ordered sequentially
        around the region (either CW or CCW).  Requires a refimage to
        convert the RA/Dec to x/y.

    region_from_filters : None, list of filter name(s), or 'all'
        If provided, ASTs will only be placed in regions with this particular
        combination of filter(s).  Or, if 'all' is chosen, ASTs will only be
        placed where there is overlap with all filters.  In practice, this
        means creating a convex hull around the catalog RA/DEC of sources with
        valid values in these filters.  Note that if the region in question is
        a donut, this will put ASTs in the hole.  This will also only work
        properly if the region is a convex polygon.  A solution to these needs
        to be figured out at some point.

    Returns
    -------
    astropy Table: List of fake stars, with magnitudes and positions
    - optionally -
    ascii file of this table, written to outfile

    """

    # if refimage exists, extract WCS info
    if refimage is None:
        wcs = None
    else:
        with fits.open(refimage) as hdu:
            imagehdu = hdu[refimage_hdu]
            wcs = WCS(imagehdu.header)

    # if appropriate information is given, extract the x/y positions so that
    # there are no ASTs generated outside of the catalog footprint
    colnames = catalog.data.columns

    if "X" or "x" in colnames:
        if "X" in colnames:
            x_positions = catalog.data["X"][:]
            y_positions = catalog.data["Y"][:]
        if "x" in colnames:
            x_positions = catalog.data["x"][:]
            y_positions = catalog.data["y"][:]

    else:
        if ("RA" in colnames) or ("ra" in colnames):
            if "RA" in colnames:
                ra_positions = catalog.data["RA"][:]
                dec_positions = catalog.data["DEC"][:]
            if "ra" in colnames:
                ra_positions = catalog.data["ra"][:]
                dec_positions = catalog.data["dec"][:]

            # if there's a refimage, convert RA/Dec to x/y
            if refimage:
                x_positions, y_positions = wcs.all_world2pix(
                    ra_positions, dec_positions, wcs_origin
                )
            else:
                x_positions, y_positions = ra_positions, dec_positions
        else:
            raise RuntimeError(
                "Your catalog does not supply X/Y or RA/DEC information to ensure ASTs are within catalog boundary"
            )

    # create path containing the positions
    coords = np.array(
        [x_positions, y_positions]
    ).T  # there's a weird astropy datatype issue that requires numpy coercion
    hull = ConvexHull(coords)
    bounds_x, bounds_y = coords[hull.vertices, 0], coords[hull.vertices, 1]
    catalog_boundary = Path(np.array([bounds_x, bounds_y]).T)

    # if coord_boundary set, define an additional boundary for ASTs
    if set_coord_boundary is not None:
        if refimage:
            bounds_x, bounds_y = wcs.all_world2pix(
                set_coord_boundary[0], set_coord_boundary[1], wcs_origin
            )
            coord_boundary = Path(np.array([bounds_x, bounds_y]).T)
        else:
            raise RuntimeError(
                "If using set_coord_boundary, you must also provide a refimage"
            )

    # if region_from_filters is set, define an additional boundary for ASTs
    if region_from_filters is not None:
        # need catalog file from datamodel
        importlib.reload(datamodel)

        if type(region_from_filters) == list:
            # good stars with user-defined partial overlap
            _, good_stars = cut_catalogs.cut_catalogs(datamodel.obsfile, 'N/A',
                                      flagged=True, flag_filter=region_from_filters,
                                      no_write=True)
        elif region_from_filters == 'all':
            # good stars only with fully overlapping region
            _, good_stars = cut_catalogs.cut_catalogs(datamodel.obsfile, 'N/A', partial_overlap=True,
                                      no_write=True)
        else:
            raise RuntimeError('Invalid argument for region_from_filters')
        
        coords = np.array(
            [x_positions[good_stars == 1], y_positions[good_stars == 1]]
        ).T  # there's a weird astropy datatype issue that requires numpy coercion
        hull = ConvexHull(coords)
        bounds_x, bounds_y = coords[hull.vertices, 0], coords[hull.vertices, 1]
        filt_reg_boundary = Path(np.array([bounds_x, bounds_y]).T)
   
    # Load the background map
    print(Npermodel, " repeats of each model in each map bin")

    bdm = density_map.BinnedDensityMap.create(input_map, N_bins)
    tile_vals = bdm.tile_vals()
    max_val = np.amax(tile_vals)
    min_val = np.amin(tile_vals)
    tiles_foreach_bin = bdm.tiles_foreach_bin()

    # Remove any of the tiles that aren't contained within user-imposed
    # constraints (if any)
    if (set_coord_boundary is not None) or (region_from_filters is not None):

        tile_ra_min, tile_dec_min = bdm.min_ras_decs()
        tile_ra_delta, tile_dec_delta = bdm.delta_ras_decs()

        for i,tile_set in enumerate(tiles_foreach_bin):

            # keep track of which indices to discard
            keep_tile = np.ones(len(tile_set), dtype=bool)

            for j,tile in enumerate(tile_set):

                # corners of the tile
                ra_min = tile_ra_min[tile]
                ra_max = tile_ra_min[tile] + tile_ra_delta[tile]
                dec_min = tile_dec_min[tile]
                dec_max = tile_dec_min[tile] + tile_dec_delta[tile]

                # make a box object for the tile
                if wcs is None:
                    tile_box = box(ra_min, dec_min, ra_max, dec_max)
                else:
                    bounds_x, bounds_y = wcs.all_world2pix(
                        np.array([ra_min, ra_max]),
                        np.array([dec_min, dec_max]), 0)

                    tile_box = box(np.min(bounds_x), np.min(bounds_y),
                                       np.max(bounds_x), np.max(bounds_y))

                # discard tile if there's no overlap with user-imposed regions

                # - set_coord_boundary
                if set_coord_boundary is not None:
                    if Polygon(coord_boundary.vertices).intersection(tile_box).area == 0:
                        keep_tile[j] = False

                # - region_from_filters
                if region_from_filters is not None:
                    if Polygon(filt_reg_boundary.vertices).intersection(tile_box).area == 0:
                        keep_tile[j] = False

            # remove anything that needs to be discarded
            tiles_foreach_bin[i] = tile_set[keep_tile]

    # Remove empty bins
    tile_sets = [tile_set for tile_set in tiles_foreach_bin if len(tile_set)]
    print(len(tile_sets), " non-empty map bins found between ", min_val, "and", max_val)

    

    # Repeat the seds Nrealize times (sample each on at Nrealize
    # different positions, in each region)
    repeated_seds = np.repeat(chosen_seds, Nrealize)
    Nseds_per_region = len(repeated_seds)
    # For each set of tiles, repeat the seds and spread them evenly over
    # the tiles
    repeated_seds = np.repeat(repeated_seds, len(tile_sets))

    out_table = Table(repeated_seds, names=chosen_seds.colnames)
    xs = np.zeros(len(out_table))
    ys = np.zeros(len(out_table))
    bin_indices = np.zeros(len(out_table))

    tile_ra_min, tile_dec_min = bdm.min_ras_decs()
    tile_ra_delta, tile_dec_delta = bdm.delta_ras_decs()

    for bin_index, tile_set in enumerate(tqdm(tile_sets,
        desc="{:.2f} models per map bin".format(Nseds_per_region / Npermodel)
    )):
        start = bin_index * Nseds_per_region
        stop = start + Nseds_per_region
        bin_indices[start:stop] = bin_index
        for i in range(Nseds_per_region):
            x = -1
            y = -1
            # Convert each ra,dec to x,y. If there are negative values, try again
            while x < 0 or y < 0:
                # Pick a random tile
                tile = np.random.choice(tile_set)
                # Within this tile, pick a random ra and dec
                ra = tile_ra_min[tile] + np.random.random_sample() * tile_ra_delta[tile]
                dec = (
                    tile_dec_min[tile]
                    + np.random.random_sample() * tile_dec_delta[tile]
                )

                if wcs is None:
                    x, y = ra, dec
                    # check that this x/y is within the catalog footprint
                    within_bounds = catalog_boundary.contains_points(
                        [[x, y]]
                    )  # N,2 array of AST X and Y positions
                    if within_bounds is False:
                        x = -1
                    break
                else:
                    [x], [y] = wcs.all_world2pix(np.array([ra]), np.array([dec]), wcs_origin)
                    # check that this x/y is within the catalog footprint
                    within_bounds = catalog_boundary.contains_points(
                        [[x, y]]
                    )[0]  # N,2 array of AST X and Y positions
                    if within_bounds == False:
                        x = -1
                    # check that this x/y is with any input boundary
                    # (only relevant if there's a wcs from a refimage)
                    if set_coord_boundary is not None:
                        within_bounds = coord_boundary.contains_points([[x, y]])[0]
                        if within_bounds == False:
                            x = -1
                    if region_from_filters is not None:
                        within_bounds = filt_reg_boundary.contains_points([[x, y]])[0]
                        if within_bounds == False:
                            x = -1
                            
            j = bin_index * Nseds_per_region + i
            xs[j] = x
            ys[j] = y

    # I'm just mimicking the format that is produced by the examples
    cs = []
    cs.append(Column(np.zeros(len(out_table), dtype=int), name="zeros"))
    cs.append(Column(np.ones(len(out_table), dtype=int), name="ones"))

    if wcs is None:
        cs.append(Column(xs, name="RA"))
        cs.append(Column(ys, name="DEC"))
    else:
        cs.append(Column(xs, name="X"))
        cs.append(Column(ys, name="Y"))

    for i, c in enumerate(cs):
        out_table.add_column(c, index=i)  # insert these columns from the left

    # Write out the table in ascii
    if outfile:
        formats = {k: "%.5f" for k in out_table.colnames[2:]}
        ascii.write(out_table, outfile, overwrite=True, formats=formats)

    return out_table
Beispiel #2
0
def beast_production_wrapper():
    """
    This does all of the steps for a full production run, and can be used as
    a wrapper to automatically do most steps for multiple fields.
    * make datamodel.py file
    * make source density map
    * make background density map
    * split catalog by source density
    * make physics model (SED grid)
    * make input list for ASTs
    * make noise model
    * generate batch script to trim models
    * generate batch script to fit models
    * merge stats files back together
    * spatially reorder the results

    Places for user to manually do things:
    * editing code before use
        - datamodel_template.py: setting up the file with desired parameters
        - here: list the catalog filter names with the corresponding BEAST names
        - here: choose settings (pixel size, filter, mag range) for the source density map
        - here: choose settings (pixel size, reference image) for the background map
        - here: choose settings (filter, number per file) for dividing catalog by source density
        - here: choose settings (# files, nice level) for the trimming/fitting batch scripts
    * process the ASTs, as described in BEAST documentation
    * run the trimming scripts
    * run the fitting scripts

    BEWARE: When running the trimming/fitting scripts, ensure that the correct
    datamodel.py file is in use.  Since it gets updated every time this code is
    run, you may be unexpectedly be using one from another field.
    """

    # the list of fields
    field_names = ["15275_IC1613"]

    # distance moduli and velocities
    # http://adsabs.harvard.edu/abs/2013AJ....146...86T
    dist_mod = [24.36]
    velocity = [-236]

    # the path+file for a reference image
    im_path = ["../beast_dwarfs/images/15275_IC1613_F555W_drz.fits.gz"]
    ref_filter = ["F555W"]

    # choose a filter to use for removing artifacts
    # (remove catalog sources with filter_FLAG > 99)
    flag_filter = ["F555W"]

    # number of fields
    n_field = len(field_names)

    # Need to know what the correspondence is between filter names in the
    # catalog and the BEAST filter names.
    #
    # These will be used to automatically determine the filters present in
    # each GST file and fill in the datamodel.py file.  The order doesn't
    # matter, as long as the order in one list matches the order in the other
    # list.
    #
    gst_filter_names = [
        "F275W", "F336W", "F390M", "F555W", "F814W", "F110W", "F160W"
    ]
    beast_filter_names = [
        "HST_WFC3_F275W",
        "HST_WFC3_F336W",
        "HST_WFC3_F390M",
        "HST_WFC3_F555W",
        "HST_WFC3_F814W",
        "HST_WFC3_F110W",
        "HST_WFC3_F160W",
    ]

    for b in range(n_field):
        # for b in [0]:

        print("********")
        print("field " + field_names[b])
        print("********")

        # -----------------
        # data file names
        # -----------------

        # paths for the data/AST files
        gst_file = "./data/" + field_names[b] + ".gst.fits"
        ast_file = "./data/" + field_names[b] + ".gst.fake.fits"
        # path for the reference image (if using for the background map)
        im_file = im_path[b]

        # region file with catalog stars
        # make_region_file(gst_file, ref_filter[b])

        # -----------------
        # 0. make datamodel file
        # -----------------

        # need to do this first, because otherwise any old version that exists
        # will be imported, and changes made here won't get imported again

        print("")
        print("creating datamodel file")
        print("")

        create_datamodel(
            gst_file,
            ast_file,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
        )

        # load in datamodel to get number of subgrids
        import datamodel

        importlib.reload(datamodel)

        # -----------------
        # 1a. make magnitude histograms
        # -----------------

        print("")
        print("making magnitude histograms")
        print("")

        # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'):
        peak_mags = plot_mag_hist.plot_mag_hist(gst_file,
                                                stars_per_bin=70,
                                                max_bins=75)
        # test = plot_mag_hist.plot_mag_hist(ast_file, stars_per_bin=200, max_bins=30)

        # -----------------
        # 1b. make a source density map
        # -----------------

        print("")
        print("making source density map")
        print("")

        # not currently doing background density bins
        # use_bg_info = True
        use_bg_info = False
        if use_bg_info:
            background_args = types.SimpleNamespace(
                subcommand="background",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                reference=im_file,
                mask_radius=10,
                ann_width=20,
                cat_filter=[ref_filter, "90"],
            )
            create_background_density_map.main_make_map(background_args)

        # but we are doing source density bins!
        if not os.path.isfile(
                gst_file.replace(".fits", "_source_den_image.fits")):
            # - pixel size of 10 arcsec
            # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5
            sourceden_args = types.SimpleNamespace(
                subcommand="sourceden",
                catfile=gst_file,
                pixsize=5,
                npix=None,
                mag_name=ref_filter + "_VEGA",
                mag_cut=[15, peak_mags[ref_filter - 0.5]],
                flag_name=flag_filter[b] + '_FLAG',
            )
            create_background_density_map.main_make_map(sourceden_args)

        # new file name with the source density column
        gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits")

        # -----------------
        # 2. make physics model
        # -----------------

        print("")
        print("making physics model")
        print("")

        # see which subgrid files already exist
        gs_str = ""
        if datamodel.n_subgrid > 1:
            gs_str = "sub*"

        sed_files = glob.glob("./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(
            field_names[b], gs_str))

        # only make the physics model they don't already exist
        if len(sed_files) < datamodel.n_subgrid:
            # directly create physics model grids
            #create_physicsmodel.create_physicsmodel(nprocs=1, nsubs=datamodel.n_subgrid)
            # create grids with script
            create_physicsmodel.split_create_physicsmodel(
                nprocs=1, nsubs=datamodel.n_subgrid)
            print('\n**** go run physics model code for ' + field_names[b] +
                  '! ****')
            continue

        # list of SED files
        model_grid_files = sorted(
            glob.glob("./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(
                field_names[b], gs_str)))

        # -----------------
        # 3. make ASTs
        # -----------------

        # only create an AST input list if the ASTs don't already exist
        ast_input_file = ("./" + field_names[b] + "_beast/" + field_names[b] +
                          "_beast_inputAST.txt")

        if not os.path.isfile(ast_file):
            if not os.path.isfile(ast_input_file):
                print("")
                print("creating artificial stars")
                print("")
                make_ast_inputs.make_ast_inputs(flux_bin_method=True)

            split_ast_input_file.split_asts(field_names[b] + "_beast",
                                            ast_input_file, 2000)

            print("\n**** go run ASTs for " + field_names[b] + "! ****\n")
            continue

        # -----------------
        # 4/5. edit photometry/AST catalogs
        # -----------------

        # remove sources that are
        # - in regions without full imaging coverage,
        # - flagged in flag_filter

        print("")
        print("editing photometry/AST catalogs")
        print("")

        # - photometry
        gst_file_cut = gst_file.replace(".fits", "_with_sourceden_cut.fits")
        cut_catalogs.cut_catalogs(
            gst_file_sd,
            gst_file_cut,
            partial_overlap=True,
            flagged=True,
            flag_filter=flag_filter[b],
            region_file=True,
        )

        # - ASTs
        ast_file_cut = ast_file.replace(".fits", "_cut.fits")
        cut_catalogs.cut_catalogs(
            ast_file,
            ast_file_cut,
            partial_overlap=True,
            flagged=True,
            flag_filter=flag_filter[b],
            region_file=True,
        )
        # test = plot_mag_hist.plot_mag_hist(ast_file_cut, stars_per_bin=200, max_bins=30)

        # edit the datamodel.py file to have the correct photometry file name
        # (AST file name is already automatically the cut version)
        create_datamodel(
            gst_file_cut,
            ast_file_cut,
            gst_filter_names,
            beast_filter_names,
            dist_mod[b],
            velocity[b],
            ref_image=im_file,
        )

        # -----------------
        # 6. split observations by source density
        # -----------------

        print("")
        print("splitting observations by source density")
        print("")

        # - photometry

        if len(glob.glob(gst_file_cut.replace('.fits', '*sub*fits'))) == 0:

            # a smaller value for n_per_file will mean more individual files/runs,
            # but each run will take a shorter amount of time

            split_catalog_using_map.split_main(
                gst_file_cut,
                ast_file_cut,
                gst_file.replace('.fits', '_sourceden_map.hd5'),
                bin_width=1,
                n_per_file=6250,
            )

        # -- at this point, we can run the code to create lists of filenames
        file_dict = create_filenames.create_filenames(
            use_sd=True, nsubs=datamodel.n_subgrid)

        # figure out how many files there are
        sd_sub_info = file_dict["sd_sub_info"]
        # - number of SD bins
        temp = set([i[0] for i in sd_sub_info])
        print("** total SD bins: " + str(len(temp)))
        # - the unique sets of SD+sub
        unique_sd_sub = [
            x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x)
        ]
        print("** total SD subfiles: " + str(len(unique_sd_sub)))

        # -----------------
        # 7. make noise models
        # -----------------

        print("")
        print("making noise models")
        print("")

        # create the noise model (this code will check if it exists)
        create_obsmodel.create_obsmodel(use_sd=True,
                                        nsubs=datamodel.n_subgrid,
                                        nprocs=1)

        # -----------------
        # 8. make script to trim models
        # -----------------

        print("")
        print("setting up script to trim models")
        print("")

        # save any at-queue commands
        at_list = []

        # iterate through each model grid
        for i in range(datamodel.n_subgrid):

            # gst list
            temp = file_dict["photometry_files"]
            gst_input_list = [
                x for i, x in enumerate(temp) if i == temp.index(x)
            ]

            # create corresponding files for each of those
            ast_input_list = []
            noise_files = []
            trim_prefix = []

            for j in range(len(gst_input_list)):
                # get the sd/sub number
                curr_sd = unique_sd_sub[j][0]
                curr_sub = unique_sd_sub[j][1]
                subfolder = "bin{0}_sub{1}".format(curr_sd, curr_sub)

                # create file names
                ast_input_list.append(
                    ast_file_cut.replace(".fits", "_bin" + curr_sd + ".fits"))
                if datamodel.n_subgrid > 1:
                    noise_files.append(
                        "./{0}_beast/{0}_beast_noisemodel_bin{1}.gridsub{2}.hd5"
                        .format(field_names[b], curr_sd, i))

                    trim_prefix.append(
                        "./{0}_beast/{1}/{0}_beast_{1}_gridsub{2}".format(
                            field_names[b], subfolder, i))

                if datamodel.n_subgrid == 1:
                    noise_files.append(file_dict['noise_files'][j])
                    trim_prefix.append("./{0}_beast/{0}_beast_{1}".format(
                        field_names[b], subfolder))

            # check if the trimmed grids exist before moving on
            if datamodel.n_subgrid > 1:
                trim_files = sorted(
                    glob.glob(
                        "./{0}_beast/bin*_sub*/{0}_beast_*_gridsub{1}_sed_trim.grid.hd5"
                        .format(field_names[b], i)))
            if datamodel.n_subgrid == 1:
                trim_files = sorted(
                    glob.glob("./{0}_beast/{0}_beast_*_sub*_sed_trim.grid.hd5".
                              format(field_names[b])))

            if len(trim_files) < len(gst_input_list):

                job_path = "./" + field_names[b] + "_beast/trim_batch_jobs/"
                if datamodel.n_subgrid > 1:
                    file_prefix = "BEAST_gridsub" + str(i)
                if datamodel.n_subgrid == 1:
                    file_prefix = "BEAST"

                # generate trimming at-queue commands
                setup_batch_beast_trim.generic_batch_trim(
                    model_grid_files[i],
                    noise_files,
                    gst_input_list,
                    ast_input_list,
                    trim_prefix,
                    job_path=job_path,
                    file_prefix=file_prefix,
                    num_subtrim=1,
                    nice=19,
                    prefix="source activate b13",
                )

                at_list.append("at -f " + job_path + file_prefix +
                               "_batch_trim.joblist now")

        if len(at_list) > 0:
            print("\n**** go run trimming code for " + field_names[b] +
                  "! ****")
            print("Here are the command(s) to run:")
            for cmd in at_list:
                print(cmd)
            return
        else:
            print("all files are trimmed for " + field_names[b])

        # -----------------
        # 9. make script to fit models
        # -----------------

        print("")
        print("setting up script to fit models")
        print("")

        fit_run_info = setup_batch_beast_fit.setup_batch_beast_fit(
            num_percore=1,
            nice=19,
            overwrite_logfile=False,
            prefix="source activate b13",
            use_sd=True,
            nsubs=datamodel.n_subgrid,
            nprocs=1,
        )

        # check if the fits exist before moving on
        tot_remaining = len(fit_run_info["done"]) - np.sum(
            fit_run_info["done"])
        if tot_remaining > 0:
            print("\n**** go run fitting code for " + field_names[b] +
                  "! ****")
            print("Here are the " + str(len(fit_run_info["files_to_run"])) +
                  " commands to run:")
            for job_file in fit_run_info["files_to_run"]:
                print("at -f ./" + job_file + " now")
            continue
        else:
            print("all fits are complete for " + field_names[b])

        # -----------------
        # 10. merge stats files from each fit
        # -----------------

        print("")
        print("merging stats files")
        print("")

        merge_files.merge_files(use_sd=True, nsubs=datamodel.n_subgrid)
Beispiel #3
0
def pick_positions_from_map(
    catalog,
    chosen_seds,
    input_map,
    bin_mode,
    N_bins,
    bin_width,
    custom_bins,
    Npermodel,
    outfile=None,
    refimage=None,
    refimage_hdu=1,
    wcs_origin=1,
    Nrealize=1,
    set_coord_boundary=None,
    region_from_filters=None,
    erode_boundary=None,
):
    """
    Spreads a set of fake stars across regions of similar values,
    given a map file generated by 'create background density map' or
    'create stellar density map' in the tools directory.

    The tiles of the given map are divided across a given
    number of bins. Each bin will then have its own set of tiles,
    which constitute a region on the image.

    Then, for each bin, the given set of fake stars is duplicated,
    and the stars are assigned random positions within this region.

    This way, it can be ensured that enough ASTs are performed for each
    regime of the map, making it possible to have a separate noise model
    for each of these regions.

    Parameters
    ----------

    catalog: Observations object
        Provides the observations

    chosen_seds: astropy Table
        Table containing fake stars to be duplicated and assigned positions

    input_map: str
        Path to a hd5 file containing the file written by a DensityMap

    bin_mode: str
        The convention for generating bins of source density. The options
        are "linear" (for linear binning) and "log" (for log binning). If "log",
        the number of bins (N_bins) must be set. If "linear", either N_bins
        or the bin width (bin_width), or neither (resulting in
        default integer binning by sources/arcsec^2) can be set.
        Default: "linear"

    N_bins: int
        The number of bins for the range of background density values.
        The bins will be picked on a linear grid or log grid (according to bin_mode)
        ranging from the minimum to the maximum value of the map. Then, each tile will be
        put in a bin, so that a set of tiles of the map is obtained for
        each range of source density/background values.

    bin_width: int
        The bin width for the range of  background density values, in units
        of number of sources per square arcsecond.
        The bins will be picked on a linear grid, ranging from the
        minimum to the maximum value of the map. Then, each tile will be
        put in a bin, so that a set of tiles of the map is obtained for
        each range of source density/background values.

    custom_bins: list (default=None)
        Custom values of bin edges for source or background density values.
        Each tile will be put into a bin, so that a set of tiles of the
        map is obtained for each range of source density/background values.

    refimage: str
        Path to fits image that is used for the positions. If none is
        given, the ra and dec will be put in the x and y output columns
        instead.

    refimage_hdu: int (default=1)
        index of the HDU from which to get the header, which will be used
        to extract WCS information

    wcs_origin : 0 or 1 (default=1)
        As described in the WCS documentation: "the coordinate in the upper
        left corner of the image. In FITS and Fortran standards, this is 1.
        In Numpy and C standards this is 0."

    Nrealize: integer
        The number of times each model should be repeated for each
        background regime. This is to sample the variance due to
        variations within each region, for each individual model.

    set_coord_boundary : None, or list of 2 numpy arrays
        If provided, these RA/Dec coordinates will be used to limit the
        region over which ASTs are generated.  Input should be list of two
        arrays, the first RA and the second Dec, ordered sequentially
        around the region (either CW or CCW).  If the input catalog only has x/y
        (no RA/Dec), a refimage is required.

    region_from_filters : None, list of filter name(s), or 'all'
        If provided, ASTs will only be placed in regions with this particular
        combination of filter(s).  Or, if 'all' is chosen, ASTs will only be
        placed where there is overlap with all filters.  In practice, this
        means creating a convex hull around the catalog RA/DEC of sources with
        valid values in these filters.  Note that if the region in question is
        a donut, this will put ASTs in the hole.  This will also only work
        properly if the region is a convex polygon.  A solution to these needs
        to be figured out at some point.

    erode_boundary : None, or float (default=None)
        If provided, this number of arcseconds will be eroded from the region
        over which ASTs are generated.  The purpose is to avoid placing ASTs
        near the image edge.  Erosion is applied to both the catalog boundary
        and the values from set_coord_boundary.  If the input catalog only has
        x/y (no RA/Dec), a refimage is required.

    Returns
    -------
    astropy Table: List of fake stars, with magnitudes and positions
    - optionally -
    ascii file of this table, written to outfile

    """

    # if refimage exists, extract WCS info
    if refimage is None:
        ref_wcs = None
    else:
        with fits.open(refimage) as hdu:
            imagehdu = hdu[refimage_hdu]
            ref_wcs = wcs.WCS(imagehdu.header)

    # if appropriate information is given, extract the x/y positions so that
    # there are no ASTs generated outside of the catalog footprint
    colnames = catalog.data.columns
    xy_pos = False
    radec_pos = False

    # if x/y in catalog, save them
    if ("X" in colnames) or ("x" in colnames):
        xy_pos = True
        if "X" in colnames:
            x_positions = catalog.data["X"][:]
            y_positions = catalog.data["Y"][:]
        if "x" in colnames:
            x_positions = catalog.data["x"][:]
            y_positions = catalog.data["y"][:]

    # if RA/Dec in catalog, save them
    if ("RA" in colnames) or ("ra" in colnames):
        radec_pos = True
        if "RA" in colnames:
            ra_positions = catalog.data["RA"][:]
            dec_positions = catalog.data["DEC"][:]
        if "ra" in colnames:
            ra_positions = catalog.data["ra"][:]
            dec_positions = catalog.data["dec"][:]

    # if only one of those exists and there's a refimage, convert to the other
    if xy_pos and not radec_pos and refimage:
        radec_pos = True
        x_positions, y_positions = ref_wcs.all_world2pix(
            ra_positions, dec_positions, wcs_origin)
    if radec_pos and not xy_pos and refimage:
        xy_pos = True
        ra_positions, dec_positions = ref_wcs.all_pix2world(
            x_positions, y_positions, wcs_origin)

    # if no x/y or ra/dec in the catalog, raise error
    if not xy_pos and not radec_pos:
        raise RuntimeError(
            "Your catalog does not supply X/Y or RA/DEC information to ensure ASTs are within catalog boundary"
        )

    # if erode_boundary is set, try to make a pixel version to go with xy positions
    erode_deg = None
    erode_pix = None
    if erode_boundary:
        erode_deg = erode_boundary / 3600
        if xy_pos and refimage:
            deg_per_pix = wcs.utils.proj_plane_pixel_scales(ref_wcs)[0]
            erode_pix = erode_deg / deg_per_pix

    # create path containing the positions (eroded if chosen)
    catalog_boundary_xy = None
    catalog_boundary_radec = None
    if xy_pos:
        catalog_boundary_xy = erode_path(
            cut_catalogs.convexhull_path(x_positions, y_positions), erode_pix)
    if radec_pos:
        catalog_boundary_radec = erode_path(
            cut_catalogs.convexhull_path(ra_positions, dec_positions),
            erode_deg)

    # if coord_boundary set, define an additional boundary for ASTs (eroded if chosen)
    if set_coord_boundary is not None:
        # initialize variables
        coord_boundary_xy = None
        coord_boundary_radec = None
        # evaluate one or both
        if xy_pos and refimage:
            bounds_x, bounds_y = ref_wcs.all_world2pix(set_coord_boundary[0],
                                                       set_coord_boundary[1],
                                                       wcs_origin)
            coord_boundary_xy = erode_path(
                Path(np.array([bounds_x, bounds_y]).T), erode_pix)
        if radec_pos:
            coord_boundary_radec = erode_path(
                Path(
                    np.array([set_coord_boundary[0],
                              set_coord_boundary[1]]).T),
                erode_deg,
            )

    # if region_from_filters is set, define an additional boundary for ASTs
    if region_from_filters is not None:

        # 1. find the sub-list of sources
        if isinstance(region_from_filters, list):
            # good stars with user-defined partial overlap
            _, good_stars = cut_catalogs.cut_catalogs(
                catalog.inputFile,
                "N/A",
                flagged=True,
                flag_filter=region_from_filters,
                no_write=True,
            )
        elif region_from_filters == "all":
            # good stars only with fully overlapping region
            _, good_stars = cut_catalogs.cut_catalogs(catalog.inputFile,
                                                      "N/A",
                                                      partial_overlap=True,
                                                      no_write=True)
        else:
            raise RuntimeError("Invalid argument for region_from_filters")

        # 2. define the Path object for the convex hull
        # initialize variables
        filt_reg_boundary_xy = None
        filt_reg_boundary_radec = None
        # evaluate one or both
        if xy_pos:
            filt_reg_boundary_xy = cut_catalogs.convexhull_path(
                x_positions[good_stars == 1], y_positions[good_stars == 1])
        if radec_pos:
            filt_reg_boundary_radec = cut_catalogs.convexhull_path(
                ra_positions[good_stars == 1], dec_positions[good_stars == 1])

    # Load the background map
    print(Npermodel, " repeats of each model in each map bin")

    bdm = density_map.BinnedDensityMap.create(
        input_map,
        bin_mode=bin_mode,
        N_bins=N_bins,
        bin_width=bin_width,
        custom_bins=custom_bins,
    )

    tile_vals = bdm.tile_vals()
    max_val = np.amax(tile_vals)
    min_val = np.amin(tile_vals)
    tiles_foreach_bin = bdm.tiles_foreach_bin()

    # Remove any of the tiles that aren't contained within user-imposed
    # constraints (if any)
    if (set_coord_boundary is not None) or (region_from_filters is not None):

        tile_ra_min, tile_dec_min = bdm.min_ras_decs()
        tile_ra_delta, tile_dec_delta = bdm.delta_ras_decs()

        for i, tile_set in enumerate(tiles_foreach_bin):

            # keep track of which indices to discard
            keep_tile = np.ones(len(tile_set), dtype=bool)

            for j, tile in enumerate(tile_set):

                # corners of the tile
                ra_min = tile_ra_min[tile]
                ra_max = tile_ra_min[tile] + tile_ra_delta[tile]
                dec_min = tile_dec_min[tile]
                dec_max = tile_dec_min[tile] + tile_dec_delta[tile]

                # make a box object for the tile
                tile_box_radec = box(ra_min, dec_min, ra_max, dec_max)
                tile_box_xy = None
                if refimage:
                    bounds_x, bounds_y = ref_wcs.all_world2pix(
                        np.array([ra_min, ra_max]),
                        np.array([dec_min, dec_max]),
                        wcs_origin,
                    )

                    tile_box_xy = box(
                        np.min(bounds_x),
                        np.min(bounds_y),
                        np.max(bounds_x),
                        np.max(bounds_y),
                    )

                # discard tile if there's no overlap with user-imposed regions

                # - erode_boundary
                # if you only want to erode the boundary and not impose other
                # coordinate boundary constraints, still discard SD tiles that don't overlap
                if (set_coord_boundary is None) and (erode_boundary
                                                     is not None):
                    if catalog_boundary_radec and tile_box_radec:
                        if (Polygon(
                                catalog_boundary_radec.vertices).intersection(
                                    tile_box_radec).area == 0):
                            keep_tile[j] = False
                    elif catalog_boundary_xy and tile_box_xy:
                        if (Polygon(catalog_boundary_xy.vertices).intersection(
                                tile_box_xy).area == 0):
                            keep_tile[j] = False
                # - set_coord_boundary
                if set_coord_boundary is not None:
                    # coord boundary is input in RA/Dec, and tiles are RA/Dec,
                    # so there's no need to check the x/y version of either
                    if (Polygon(coord_boundary_radec.vertices).intersection(
                            tile_box_radec).area == 0):
                        keep_tile[j] = False

                # - region_from_filters
                if region_from_filters is not None:
                    if filt_reg_boundary_radec and tile_box_radec:
                        if (Polygon(
                                filt_reg_boundary_radec.vertices).intersection(
                                    tile_box_radec).area == 0):
                            keep_tile[j] = False
                    elif filt_reg_boundary_xy and tile_box_xy:
                        if (Polygon(
                                filt_reg_boundary_xy.vertices).intersection(
                                    tile_box_xy).area == 0):
                            keep_tile[j] = False
                    else:
                        warnings.warn(
                            "Unable to use regions_from_filters to remove SD/bg tiles"
                        )

            # remove anything that needs to be discarded
            tiles_foreach_bin[i] = tile_set[keep_tile]

    # Remove empty bins
    tile_sets = [tile_set for tile_set in tiles_foreach_bin if len(tile_set)]
    print(
        "{0} non-empty map bins (out of {1}) found between {2} and {3}".format(
            len(tile_sets), N_bins, min_val, max_val))

    # Repeat the seds Nrealize times (sample each on at Nrealize
    # different positions, in each region)
    repeated_seds = np.repeat(chosen_seds, Nrealize)
    Nseds_per_region = len(repeated_seds)
    # For each set of tiles, repeat the seds and spread them evenly over
    # the tiles
    repeated_seds = np.repeat(repeated_seds, len(tile_sets))

    out_table = Table(repeated_seds, names=chosen_seds.colnames)
    ast_x_list = np.zeros(len(out_table))
    ast_y_list = np.zeros(len(out_table))
    bin_indices = np.zeros(len(out_table))

    tile_ra_min, tile_dec_min = bdm.min_ras_decs()
    tile_ra_delta, tile_dec_delta = bdm.delta_ras_decs()

    for bin_index, tile_set in enumerate(
            tqdm(
                tile_sets,
                desc="{:.2f} models per map bin".format(Nseds_per_region /
                                                        Npermodel),
            )):
        start = bin_index * Nseds_per_region
        stop = start + Nseds_per_region
        bin_indices[start:stop] = bin_index
        for i in range(Nseds_per_region):

            # keep track of whether we're still looking for valid coordinates
            x = None
            y = None

            while (x is None) or (y is None):
                # Pick a random tile in this tile set
                tile = np.random.choice(tile_set)
                # Within this tile, pick a random ra and dec
                ra = tile_ra_min[tile] + np.random.random_sample(
                ) * tile_ra_delta[tile]
                dec = (tile_dec_min[tile] +
                       np.random.random_sample() * tile_dec_delta[tile])

                # if we can't convert this to x/y, do everything in RA/Dec
                if ref_wcs is None:
                    x, y = ra, dec

                    # check that this x/y is within the catalog footprint
                    if catalog_boundary_radec:
                        # N,2 array of AST X and Y positions
                        inbounds = catalog_boundary_radec.contains_points(
                            [[x, y]])[0]

                        if not inbounds:
                            x = None

                    # check that this x/y is with any input boundary
                    if set_coord_boundary is not None:
                        if coord_boundary_radec:
                            inbounds = coord_boundary_radec.contains_points(
                                [[x, y]])[0]
                            if not inbounds:
                                x = None
                    if region_from_filters is not None:
                        if filt_reg_boundary_radec:
                            # fmt: off
                            inbounds = filt_reg_boundary_radec.contains_points(
                                [[x, y]])[0]
                            # fmt: on
                            if not inbounds:
                                x = None

                # if we can convert to x/y, do everything in x/y
                else:
                    [x], [y] = ref_wcs.all_world2pix(np.array([ra]),
                                                     np.array([dec]),
                                                     wcs_origin)

                    # check that this x/y is within the catalog footprint
                    # N,2 array of AST X and Y positions
                    inbounds = catalog_boundary_xy.contains_points([[x, y]])[0]
                    if not inbounds:
                        x = None

                    # check that this x/y is with any input boundary
                    if set_coord_boundary is not None:
                        if coord_boundary_xy:
                            inbounds = coord_boundary_xy.contains_points(
                                [[x, y]])[0]
                            if not inbounds:
                                x = None
                    if region_from_filters is not None:
                        if filt_reg_boundary_xy:
                            inbounds = filt_reg_boundary_xy.contains_points(
                                [[x, y]])[0]
                            if not inbounds:
                                x = None

            j = bin_index + i * len(tile_sets)
            ast_x_list[j] = x
            ast_y_list[j] = y

    # I'm just mimicking the format that is produced by the examples
    cs = []
    cs.append(Column(np.zeros(len(out_table), dtype=int), name="zeros"))
    cs.append(Column(np.ones(len(out_table), dtype=int), name="ones"))

    # positions were found using RA/Dec
    if ref_wcs is None:
        cs.append(Column(ast_x_list, name="RA"))
        cs.append(Column(ast_y_list, name="DEC"))
    # positions were found using x/y
    else:
        cs.append(Column(ast_x_list, name="X"))
        cs.append(Column(ast_y_list, name="Y"))

    for i, c in enumerate(cs):
        out_table.add_column(c, index=i)  # insert these columns from the left

    # Write out the table in ascii
    if outfile:
        formats = {k: "%.5f" for k in out_table.colnames[2:]}
        ascii.write(out_table, outfile, overwrite=True, formats=formats)

    return out_table