def split_main( beast_settings_info, catfile, astfile, mapfile, n_per_file=6250, min_n_subfile=None, sort_col="F475W_RATE", ): """ Making the physics model grid takes a while for production runs. This creates scripts to run each subgrid as a separate job. Parameters ---------- beast_settings_info : string or instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance catfile : string name of the photometry catalog file astfile : string name of the ast catalog file mapfile : string background or source density map file n_per_file : int or None (default=6250) If set, divide the split catalog into sub-catalogs with length n_per_file. Good for photometry, not useful for ASTs. min_n_subfile : int or None (default=None) If set, each bin in the photometry catalog will be split into at least this many subfiles. Useful if a bin has fewer than n_per_file stars but you still want flux-sorted subfiles (which means more trimming and faster fitting). sort_col : string (default="F475W_RATE") If n_per_file or min_n_subfile is set, the catalog will be sorted by this column before splitting into sub-catalogs. """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # Create a binned density map, so both the observed and the ast # catalog can be split using a consistent grouping (= binning) of # the tiles if not settings.sd_Nbins and not settings.sd_binwidth and not settings.sd_custom: raise RuntimeError( "You need to specify the source density binning parameters in beast_settings_info" ) bdm = BinnedDensityMap.create( mapfile, bin_mode=settings.sd_binmode, N_bins=settings.sd_Nbins, bin_width=settings.sd_binwidth, custom_bins=settings.sd_custom, ) print("Splitting catalog") split_catalog_using_map( catfile, bdm, n_per_file=n_per_file, min_n_subfile=min_n_subfile, sort_col=sort_col, ) print("") print("Splitting ASTs") split_catalog_using_map( astfile, bdm, ra_colname="RA_J2000", dec_colname="DEC_J2000", n_per_file=None )
def make_ast_inputs(beast_settings_info, pick_method="flux_bin_method"): """ Make the list of artificial stars to be run through the photometry pipeline Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance pick_method : string (default = "flux_bin_method") By default, use the flux bin method to select SEDs. If set to "random_seds", randomly select SEDs from the model grid. If set to "suppl_seds", supplement the existing input ASTs by randomly selecting additional SEDs from the list of non-selected models. """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # read in the photometry catalog obsdata = Observations(settings.obsfile, settings.filters, obs_colnames=settings.obs_colnames) # -------------------- # select SEDs # -------------------- modelsedgrid_filename = "./{0}/{0}_seds.grid.hd5".format(settings.project) Nrealize = settings.ast_realization_per_model # file names for stars and corresponding SED parameters if pick_method == "suppl_seds": outfile_seds = "./{0}/{0}_inputAST_seds_suppl.txt".format( settings.project) outfile_params = "./{0}/{0}_ASTparams_suppl.fits".format( settings.project) else: outfile_seds = "./{0}/{0}_inputAST_seds.txt".format(settings.project) outfile_params = "./{0}/{0}_ASTparams.fits".format(settings.project) # if the SED file doesn't exist, create SEDs if not os.path.isfile(outfile_seds): print("Selecting SEDs for ASTs") if pick_method == "flux_bin_method": N_fluxes = settings.ast_n_flux_bins min_N_per_flux = settings.ast_n_per_flux_bin bins_outfile = "./{0}/{0}_ASTfluxbins.txt".format(settings.project) chosen_seds = pick_models_toothpick_style( modelsedgrid_filename, settings.filters, N_fluxes, min_N_per_flux, outfile=outfile_seds, outfile_params=outfile_params, bins_outfile=bins_outfile, ) if pick_method == "random_pick": # construct magnitude cuts mag_cuts = settings.ast_maglimit Nfilters = settings.ast_bands_above_maglimit if len(mag_cuts) == 1: tmp_cuts = mag_cuts min_mags = np.zeros(len(settings.filters)) for k, filtername in enumerate(obsdata.filters): sfiltername = obsdata.filter_aliases[filtername] sfiltername = sfiltername.replace("rate", "vega") sfiltername = sfiltername.replace("RATE", "VEGA") (keep, ) = np.where(obsdata[sfiltername] < 99.0) min_mags[k] = np.percentile(obsdata[keep][sfiltername], 90.0) # max. mags from the gst observation cat. mag_cuts = min_mags + tmp_cuts N_models = settings.ast_models_selected_per_age chosen_seds = pick_models( modelsedgrid_filename, settings.filters, mag_cuts, Nfilter=Nfilters, N_stars=N_models, Nrealize=Nrealize, outfile=outfile_seds, outfile_params=outfile_params, ) if pick_method == "suppl_seds": print("Supplementing ASTs") nAST = settings.ast_N_supplement existingASTfile = settings.ast_existing_file mag_cuts = settings.ast_suppl_maglimit color_cuts = settings.ast_suppl_colorlimit chosen_seds = supplement_ast( modelsedgrid_filename, settings.filters, nAST=nAST, existingASTfile=existingASTfile, outASTfile=outfile_seds, outASTfile_params=outfile_params, mag_cuts=mag_cuts, color_cuts=color_cuts, ) # if the SED file does exist, read them in else: print("Reading existing AST SEDs") chosen_seds = Table.read(outfile_seds, format="ascii") # -------------------- # assign positions # -------------------- # if we want ASTs with positions included (rather than just the fluxes from # the section above) if settings.ast_with_positions: print("Assigning positions to artifical stars") outfile = "./{0}/{0}_inputAST.txt".format(settings.project) if pick_method == "suppl_seds": outfile = "./{0}/{0}_inputAST_suppl.txt".format(settings.project) # if we're replicating SEDs across source density or background bins if settings.ast_density_table is not None: if hasattr(settings, "ast_reference_image_hdu_extension"): hdu_ext = settings.ast_reference_image_hdu_extension else: hdu_ext = 1 make_ast_xy_list.pick_positions_from_map( obsdata, chosen_seds, settings.ast_density_table, settings.sd_binmode, settings.sd_Nbins, settings.sd_binwidth, settings.sd_custom, settings.ast_realization_per_model, outfile=outfile, refimage=settings.ast_reference_image, refimage_hdu=hdu_ext, wcs_origin=1, Nrealize=1, set_coord_boundary=settings.ast_coord_boundary, region_from_filters="all", erode_boundary=settings.ast_erode_selection_region, ) # if we're not using SD/background maps, SEDs will be distributed # based on catalog sources else: make_ast_xy_list.pick_positions( obsdata, outfile_seds, outfile, settings.ast_pixel_distribution, refimage=settings.ast_reference_image, )
"--trim", help="Trim the physics and observation model grids", action="store_true", ) parser.add_argument("-f", "--fit", help="Fit the observed data", action="store_true") parser.add_argument("-r", "--resume", help="Resume a fitting run", action="store_true") args = parser.parse_args() # read in BEAST settings settings = beast_settings.beast_settings("beast_settings.txt") if args.physicsmodel: create_physicsmodel.create_physicsmodel( settings, nsubs=settings.n_subgrid, nprocs=1, ) if args.ast: make_ast_inputs.make_ast_inputs(settings, flux_bin_method=False) if args.observationmodel: print("Generating noise model from ASTs and absflux A matrix")
def beast_ast_inputs(field_name=None, ref_image=None, filter_ids=None, galaxy=None, supp=0): """ This does all of the steps for generating AST inputs and can be used a wrapper to automatically do most steps for multiple fields. * make field's beast_settings file * make source density map * make background density map * split catalog by source density * make physics model (SED grid) * make input list for ASTs * prune input ASTs ---- Inputs: field_name (str): name of field ref_image (str): path to reference image filter_ids (list): list of indexes corresponding to the filters in the observation, referenced to the master list below. galaxy (str): name of target galaxy (e.g., 'SMC', 'LMC') ---- Places for user to manually do things: * editing code before use - here: list the catalog filter names with the corresponding BEAST names - here: choose settings (pixel size, filter, mag range) for the source density map - here: choose settings (pixel size, reference image) for the background map """ # the list of fields field_names = [field_name] # the path+file for a reference image im_path = [ref_image] ref_filter = ["F475W"] # choose a filter to use for removing artifacts # (remove catalog sources with filter_FLAG > 99) flag_filter = ["F475W"] # number of fields n_field = len(field_names) # Need to know what the correspondence is between filter names in the # catalog and the BEAST filter names. # # These will be used to automatically determine the filters present in # each GST file and fill in the beast settings file. The order doesn't # matter, as long as the order in one list matches the order in the other # list. # gst_filter_names = [ "F225W", "F275W", "F336W", "F475W", "F814W", "F110W", "F160W", "F657N", ] beast_filter_names = [ "HST_WFC3_F225W", "HST_WFC3_F275W", "HST_WFC3_F336W", "HST_WFC3_F475W", "HST_WFC3_F814W", "HST_WFC3_F110W", "HST_WFC3_F160W", "HST_WFC3_F657N", ] for b in range(n_field): print("********") print("field " + field_names[b]) print("********") # ----------------- # data file names # ----------------- # paths for the data/AST files gst_file = "./data/{0}/{0}.vgst.fits".format(field_names[b]) ast_input_file = "./{0}/{0}_inputAST.txt".format(field_names[b]) # if no galaxy is manually indicated, try to fetch from gst_file name if galaxy == None: print("no galaxy specified") print("fetching galaxy from field name") galaxy_attempt = field_names[b].split("_")[1].split("-")[0] print("is this the correct galaxy? : %s" % galaxy_attempt) # raw_input returns the empty string for "enter" yes = {'yes', 'y', 'ye', ''} no = {'no', 'n'} response = 0 while response == 0: choice = input().lower() if choice in yes: galaxy = galaxy_attempt response = 1 elif choice in no: print("please rerun with --galaxy specified") break else: sys.stdout.write("Please respond with 'yes' or 'no'") # path for the reference image (if using for the background map) im_file = im_path[b] # fetch filter ids gst_data = Table.read(gst_file) filter_cols = [c for c in gst_data.colnames if "VEGA" in c] # extract every filter mentioned in the table filters = [f.split("_")[0] for f in filter_cols] # match with the gst filter list filter_ids = [gst_filter_names.index(i) for i in filters] filter_ids.sort() gst_filter_names = [gst_filter_names[i] for i in filter_ids] beast_filter_names = [beast_filter_names[i] for i in filter_ids] print(beast_filter_names) # region file with catalog stars # make_region_file(gst_file, ref_filter[b]) # ----------------- # 0. make beast settings file # ----------------- print("") print("creating beast settings file") print("") beast_settings_filename = create_beast_settings(gst_file, ast_input_file, gst_filter_names, beast_filter_names, galaxy, ref_image=im_file, supp=supp) # load in beast settings to get number of subgrids settings = beast_settings.beast_settings( beast_settings_filename #"beast_settings_" + galaxy + "_asts_" + field_names[b] + ".txt" ) # ----------------- # 1a. make magnitude histograms # ----------------- print("") print("making magnitude histograms") print("") # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'): peak_mags = plot_mag_hist.plot_mag_hist(gst_file, stars_per_bin=70, max_bins=75) # ----------------- # 1b. make a source density map # ----------------- print("") print("making source density map") print("") # not currently doing background density bins # use_bg_info = True use_bg_info = False if use_bg_info: background_args = types.SimpleNamespace( subcommand="background", catfile=gst_file, erode_boundary=settings.ast_erode_selection_region, pixsize=5, npix=None, reference=im_file, mask_radius=10, ann_width=20, cat_filter=[ref_filter, "90"], ) create_background_density_map.main_make_map(background_args) # but we are doing source density bins! if not os.path.isfile( gst_file.replace(".fits", "_source_den_image.fits")): print("No sd image file found") # - pixel size of 10 arcsec # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5 sourceden_args = types.SimpleNamespace( subcommand="sourceden", catfile=gst_file, erode_boundary=settings.ast_erode_selection_region, pixsize=5, npix=None, mag_name=ref_filter[0] + "_VEGA", mag_cut=[17, peak_mags[ref_filter[0]] - 0.5], flag_name=flag_filter[0] + "_FLAG", ) create_background_density_map.main_make_map(sourceden_args) # new file name with the source density column gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits") # ----------------- # 2. make physics model # ----------------- print("") print("making physics model") print("") # see which subgrid files already exist gs_str = "" if settings.n_subgrid > 1: gs_str = "sub*" # try to fetch the list of SED files (physics models) model_grid_files = sorted( glob.glob("./{0}/{0}_seds.grid*.hd5".format(field_names[b], ))) # only make the physics model they don't already exist if len(model_grid_files) < settings.n_subgrid: # directly create physics model grids create_physicsmodel.create_physicsmodel(settings, nprocs=1, nsubs=settings.n_subgrid) # fetch the list of SED files again (physics models) model_grid_files = sorted( glob.glob("./{0}/{0}_seds.grid*.hd5".format(field_names[b], ))) # ------------------- # 3. make AST inputs # ------------------- print("") print("making AST inputs") print("") # only create an AST input list if the ASTs don't already exist if not os.path.isfile(ast_input_file): make_ast_inputs.make_ast_inputs(settings, pick_method="flux_bin_method") # compare magnitude histograms of ASTs with catalog plot_ast_histogram.plot_ast_histogram( ast_file=ast_input_file, sed_grid_file=model_grid_files[0]) if supp != 0: print("") print("making supplemental AST inputs") print("") ast_input_supp_file = "./{0}/{0}_inputAST_suppl.txt".format( field_names[b]) if not os.path.isfile(ast_input_supp_file): make_ast_inputs.make_ast_inputs(settings, pick_method="suppl_seds") print("now go check the diagnostic plots!")
def beast_production_wrapper(): """ This does all of the steps for a full production run, and can be used as a wrapper to automatically do most steps for multiple fields. * make field's beast_settings file * make source density map * make background density map * split catalog by source density * make physics model (SED grid) * make input list for ASTs * make noise model * generate batch script to trim models * generate batch script to fit models * merge stats files back together * spatially reorder the results Places for user to manually do things: * editing code before use - beast_settings_template.py: setting up the file with desired parameters - here: list the catalog filter names with the corresponding BEAST names - here: choose settings (pixel size, filter, mag range) for the source density map - here: choose settings (pixel size, reference image) for the background map - here: choose settings (filter, number per file) for dividing catalog by source density - here: choose settings (# files, nice level) for the trimming/fitting batch scripts * process the ASTs, as described in BEAST documentation * run the trimming scripts * run the fitting scripts """ # the list of fields field_names = ["15275_IC1613"] # distance moduli and velocities # http://adsabs.harvard.edu/abs/2013AJ....146...86T dist_mod = [24.36] velocity = [-236] # the path+file for a reference image im_path = ["../beast_dwarfs/images/15275_IC1613_F555W_drz.fits.gz"] ref_filter = ["F555W"] # choose a filter to use for removing artifacts # (remove catalog sources with filter_FLAG > 99) flag_filter = ["F555W"] # number of fields n_field = len(field_names) # Need to know what the correspondence is between filter names in the # catalog and the BEAST filter names. # # These will be used to automatically determine the filters present in # each GST file and fill in the beast settings file. The order doesn't # matter, as long as the order in one list matches the order in the other # list. # gst_filter_names = ["F275W", "F336W", "F390M", "F555W", "F814W", "F110W", "F160W"] beast_filter_names = [ "HST_WFC3_F275W", "HST_WFC3_F336W", "HST_WFC3_F390M", "HST_WFC3_F555W", "HST_WFC3_F814W", "HST_WFC3_F110W", "HST_WFC3_F160W", ] for b in range(n_field): # for b in [0]: print("********") print("field " + field_names[b]) print("********") # ----------------- # data file names # ----------------- # paths for the data/AST files gst_file = "./data/" + field_names[b] + ".gst.fits" ast_file = "./data/" + field_names[b] + ".gst.fake.fits" # path for the reference image (if using for the background map) im_file = im_path[b] # region file with catalog stars # make_region_file(gst_file, ref_filter[b]) # ----------------- # 0. make beast settings file # ----------------- print("") print("creating beast settings file") print("") create_beast_settings( gst_file, ast_file, gst_filter_names, beast_filter_names, dist_mod[b], velocity[b], ref_image=im_file, ) # load in beast settings to get number of subgrids settings = beast_settings.beast_settings( "beast_settings_" + field_names[i] + ".txt" ) # ----------------- # 1a. make magnitude histograms # ----------------- print("") print("making magnitude histograms") print("") # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'): peak_mags = plot_mag_hist.plot_mag_hist(gst_file, stars_per_bin=70, max_bins=75) # test = plot_mag_hist.plot_mag_hist(ast_file, stars_per_bin=200, max_bins=30) # ----------------- # 1b. make a source density map # ----------------- print("") print("making source density map") print("") # not currently doing background density bins # use_bg_info = True use_bg_info = False if use_bg_info: background_args = types.SimpleNamespace( subcommand="background", catfile=gst_file, pixsize=5, npix=None, reference=im_file, mask_radius=10, ann_width=20, cat_filter=[ref_filter, "90"], ) create_background_density_map.main_make_map(background_args) # but we are doing source density bins! if not os.path.isfile(gst_file.replace(".fits", "_source_den_image.fits")): # - pixel size of 10 arcsec # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5 sourceden_args = types.SimpleNamespace( subcommand="sourceden", catfile=gst_file, pixsize=5, npix=None, mag_name=ref_filter + "_VEGA", mag_cut=[15, peak_mags[ref_filter - 0.5]], flag_name=flag_filter[b]+'_FLAG', ) create_background_density_map.main_make_map(sourceden_args) # new file name with the source density column gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits") # ----------------- # 2. make physics model # ----------------- print("") print("making physics model") print("") # see which subgrid files already exist gs_str = "" if settings.n_subgrid > 1: gs_str = "sub*" sed_files = glob.glob( "./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(field_names[b], gs_str) ) # only make the physics model they don't already exist if len(sed_files) < settings.n_subgrid: # directly create physics model grids #create_physicsmodel.create_physicsmodel(nprocs=1, nsubs=settings.n_subgrid) # create grids with script create_physicsmodel.split_create_physicsmodel( settings, nprocs=1, nsubs=settings.n_subgrid ) print('\n**** go run physics model code for '+field_names[b]+'! ****') continue # ----------------- # 3. make ASTs # ----------------- # only create an AST input list if the ASTs don't already exist ast_input_file = ( "./" + field_names[b] + "_beast/" + field_names[b] + "_beast_inputAST.txt" ) if not os.path.isfile(ast_file): if not os.path.isfile(ast_input_file): print("") print("creating artificial stars") print("") make_ast_inputs.make_ast_inputs(settings, flux_bin_method=True) split_ast_input_file.split_asts( field_names[b] + "_beast", ast_input_file, 2000 ) print("\n**** go run ASTs for " + field_names[b] + "! ****\n") continue # ----------------- # 4/5. edit photometry/AST catalogs # ----------------- # remove sources that are # - in regions without full imaging coverage, # - flagged in flag_filter print("") print("editing photometry/AST catalogs") print("") # - photometry gst_file_cut = gst_file.replace(".fits", "_with_sourceden_cut.fits") ast_file_cut = ast_file.replace(".fits", "_cut.fits") cut_catalogs.cut_catalogs( gst_file_sd, gst_file_cut, ast_file, ast_file_cut, partial_overlap=True, flagged=True, flag_filter=flag_filter[b], region_file=True, ) # edit the beast settings file to have the correct photometry file name # (AST file name is already automatically the cut version) create_beast_settings( gst_file_cut, ast_file_cut, gst_filter_names, beast_filter_names, dist_mod[b], velocity[b], ref_image=im_file, ) # load in the new settings settings = beast_settings.beast_settings( "beast_settings_" + field_names[i] + ".txt" ) # ----------------- # 6. split observations by source density # ----------------- print("") print("splitting observations by source density") print("") # - photometry if len(glob.glob(gst_file_cut.replace('.fits','*sub*fits') )) == 0: # a smaller value for n_per_file will mean more individual files/runs, # but each run will take a shorter amount of time split_catalog_using_map.split_main( gst_file_cut, ast_file_cut, gst_file.replace('.fits','_sourceden_map.hd5'), bin_width=1, n_per_file=6250, sort_col=ref_filter[b]+'_RATE', ) # -- at this point, we can run the code to create lists of filenames file_dict = create_filenames.create_filenames( settings, use_sd=True, nsubs=settings.n_subgrid ) # figure out how many files there are sd_sub_info = file_dict["sd_sub_info"] # - number of SD bins temp = set([i[0] for i in sd_sub_info]) print("** total SD bins: " + str(len(temp))) # - the unique sets of SD+sub unique_sd_sub = [ x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x) ] print("** total SD subfiles: " + str(len(unique_sd_sub))) # ----------------- # 7. make noise models # ----------------- print("") print("making noise models") print("") # create the noise model (this code will check if it exists) create_obsmodel.create_obsmodel( settings, use_sd=True, nsubs=settings.n_subgrid, nprocs=1 ) # ----------------- # 8. make script to trim models # ----------------- print("") print("setting up script to trim models") print("") job_file_list = make_trim_scripts.make_trim_scripts( settings, num_subtrim=1, prefix='source activate b13' ) if len(job_file_list) > 0: print('\n**** go run trimming code for '+field_names[b]+'! ****') print('Here are the command(s) to run:') for job in job_file_list: print('at -f '+job+' now') return else: print('all files are trimmed for '+field_names[b]) # ----------------- # 9. make script to fit models # ----------------- print("") print("setting up script to fit models") print("") fit_run_info = setup_batch_beast_fit.setup_batch_beast_fit( settings, num_percore=1, nice=19, overwrite_logfile=False, prefix="source activate b13", use_sd=True, nsubs=settings.n_subgrid, nprocs=1, ) # check if the fits exist before moving on tot_remaining = len(fit_run_info["done"]) - np.sum(fit_run_info["done"]) if tot_remaining > 0: print("\n**** go run fitting code for " + field_names[b] + "! ****") print( "Here are the " + str(len(fit_run_info["files_to_run"])) + " commands to run:" ) for job_file in fit_run_info["files_to_run"]: print("at -f ./" + job_file + " now") continue else: print("all fits are complete for " + field_names[b]) # ----------------- # 10. merge stats files from each fit # ----------------- print("") print("merging stats files") print("") merge_files.merge_files(settings, use_sd=True, nsubs=settings.n_subgrid)
def plot_toothpick_details(asts_filename, settings_filename, savefig=False): """ Plot the details of the toothpick noisemodel creation for each filter. These plots show the individual AST results as points as (flux_in - flux_out)/flux_in. In addition, the binned values of these points are plotted giving the bias term in the observation model. Error bars around the binned bias values give the binned sigma term of the observation model. Finally, as a separate column of plots the binned completeness in each filter is plotted. Parameters ---------- asts_filename : str filename with the AST results settings_filename : str filename with the SED grid (used just for the filter information) savefig : str (default=False) to save the figure, set this to the file extension (e.g., 'png', 'pdf') """ settings = beast_settings.beast_settings(settings_filename) # read in AST results model = toothpick.MultiFilterASTs(asts_filename, settings.filters) # set the column mappings as the external file is BAND_VEGA or BAND_IN model.set_data_mappings(upcase=True, in_pair=("in", "in"), out_pair=("out", "rate")) # compute binned biases, uncertainties, and completeness as a function of band flux ast_nonrecovered_ratio = 2.0 model.fit_bins( nbins=50, ast_nonrecovered_ratio=ast_nonrecovered_ratio, ) nfilters = len(settings.filters) figsize_y = nfilters * 3 fig, ax = plt.subplots(nrows=nfilters, ncols=2, figsize=(14, figsize_y), sharex=True) set_params() for i, cfilter in enumerate(settings.filters): mag_in = model.data[model.filter_aliases[cfilter + "_in"]] flux_out = model.data[model.filter_aliases[cfilter + "_out"]] flux_in = (10**(-0.4 * mag_in)) * model.vega_flux[i] flux_out *= model.vega_flux[i] gvals = flux_out != 0.0 ax[i, 0].plot( flux_in[gvals], flux_out[gvals] / flux_in[gvals], "ko", alpha=0.1, markersize=2, ) # not all bins are filled with good data ngbins = model._nasts[i] ax[i, 0].plot( model._fluxes[0:ngbins, i], 1. + model._biases[0:ngbins, i] / model._fluxes[0:ngbins, i], "b-", ) ax[i, 0].errorbar( model._fluxes[0:ngbins, i], 1. + model._biases[0:ngbins, i] / model._fluxes[0:ngbins, i], yerr=model._sigmas[0:ngbins, i] / model._fluxes[0:ngbins, i], fmt="bo", markersize=2, alpha=0.5, ) if ast_nonrecovered_ratio is not None: ax[i, 0].axhline(ast_nonrecovered_ratio, linestyle="--", alpha=0.25, color="k") ax[i, 0].set_ylim(-10, 2.5) ax[i, 0].set_ylabel(r"$F_o/F_i$") ax[i, 1].plot( model._fluxes[0:ngbins, i], model._compls[0:ngbins, i], "b-", ) ax[i, 1].yaxis.tick_right() ax[i, 1].yaxis.set_label_position("right") ax[i, 1].set_ylim(0, 1) ax[i, 1].set_xscale("log") sfilt = cfilter.split("_")[-1] ax[i, 1].set_ylabel(f"C({sfilt})") ax[nfilters - 1, 0].set_xlabel(r"$F_i$") ax[nfilters - 1, 1].set_xlabel(r"$F_i$") # add in the zero line # do after all the data has been plotted to get the full x range pxrange = ax[0, 0].get_xlim() for i, cfilter in enumerate(settings.filters): ax[i, 0].plot(pxrange, [1.0, 1.0], "k--", alpha=0.5) # figname basename = asts_filename.replace(".fits", "_plot") fig.tight_layout() # save or show fig if savefig: fig.savefig("{}.{}".format(basename, savefig)) else: plt.show()
def split_create_physicsmodel(beast_settings_info, nsubs=1, nprocs=1): """ Making the physics model grid takes a while for production runs. This creates scripts to run each subgrid as a separate job. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance nsubs : int (default=1) number of subgrids to split the physics model into nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # make sure the project directory exists create_project_dir(settings.project) # directory for scripts job_path = "./{0}/model_batch_jobs/".format(settings.project) if not os.path.isdir(job_path): os.mkdir(job_path) log_path = job_path + "logs/" if not os.path.isdir(log_path): os.mkdir(log_path) for i in range(nsubs): joblist_file = job_path + "create_physicsmodel_" + str(i) + ".job" with open(joblist_file, "w") as jf: jf.write( "python -m beast.tools.run.create_physicsmodel " + " {0} ".format(settings.settings_file) + " --nsubs " + str(nsubs) + " --nprocs " + str(nprocs) + " --subset " + str(i) + " " + str(i + 1) + " >> " + log_path + "create_physicsmodel_" + str(i) + ".log\n" ) # slurm needs it to be executable os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH)
def merge_files(beast_settings_info, use_sd=True, nsubs=1, partial=False): """ Merge all of the results from the assorted fitting sub-files (divided by source density, subgrids, or both). If fitting is in progress but you want to check results of completed stars, set partial=True. This is only relevant when using subgrids. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance use_sd : boolean (default=True) set to True if the fitting used source density bins nsubs : int (default=1) number of subgrids used for the physics model partial : boolean (default=False) If True, the output merged files will only have stars that have been run across all subgrids. If stars have only been fit in some subgrids and not others, they will be discarded in the "partial" output files. Currently only implemented for 1D PDFs and stats (not lnP) files. """ # if there's no SD and no subgridding, running this is unnecessary if (not use_sd) and (nsubs == 1): print("No merging necessary") return # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # get file name lists (to check if they exist and/or need to be resumed) file_dict = create_filenames.create_filenames(settings, use_sd=use_sd, nsubs=nsubs) # - input files # photometry_files = file_dict['photometry_files'] # modelsedgrid_files = file_dict['modelsedgrid_files'] # noise_files = file_dict['noise_files'] # - output files stats_files = file_dict["stats_files"] pdf_files = file_dict["pdf_files"] lnp_files = file_dict["lnp_files"] # - other useful info sd_sub_info = file_dict["sd_sub_info"] # gridsub_info = file_dict['gridsub_info'] # the unique sets of gridsub unique_sd_sub = [ x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x) ] # -------------------- # no subgrids # -------------------- if nsubs == 1: out_filebase = "{0}/{0}".format(settings.project) reorder_tags = [ "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub ] merge_beast_stats.merge_stats_files(stats_files, out_filebase, reorder_tag_list=reorder_tags) # -------------------- # use subgrids # -------------------- if nsubs > 1: # runs were split by source density if use_sd: # lists to save the merged file names merged_pdf_files = [] merged_stats_files = [] merged_lnp_files = [] for sd_sub in unique_sd_sub: # indices with the current sd_sub ind = [j for j, x in enumerate(sd_sub_info) if x == sd_sub] # merge the subgrid files for that SD+sub out_filebase = "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}".format( settings.project, sd_sub[0], sd_sub[1]) if partial: out_filebase += "_partial" # - 1D PDFs and stats ( merged_pdf1d_fname, merged_stats_fname, ) = subgridding_tools.merge_pdf1d_stats( [pdf_files[j] for j in ind], [stats_files[j] for j in ind], re_run=False, output_fname_base=out_filebase, partial=partial, ) merged_pdf_files.append(merged_pdf1d_fname) merged_stats_files.append(merged_stats_fname) # - lnP files if not partial: merged_lnp_fname = subgridding_tools.merge_lnp( [lnp_files[j] for j in ind], re_run=False, output_fname_base=out_filebase, threshold=-10, ) merged_lnp_files.append(merged_lnp_fname) # merge the merged stats files out_filebase = "{0}/{0}".format(settings.project) reorder_tags = [ "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub ] merge_beast_stats.merge_stats_files(merged_stats_files, out_filebase, reorder_tag_list=reorder_tags) # runs weren't split by source density else: out_filebase = "{0}/{0}".format(settings.project) # - 1D PDFs and stats subgridding_tools.merge_pdf1d_stats( pdf_files, stats_files, output_fname_base=out_filebase, partial=partial, ) # - lnP files if not partial: subgridding_tools.merge_lnp( lnp_files, re_run=False, output_fname_base=out_filebase, threshold=-10, )
def generate_files_for_tests(run_beast=True, run_tools=True): """ Use the metal_small example to generate a full set of files for the BEAST regression tests. Parameters ---------- run_beast : boolean (default=True) if True, run the BEAST run_tools : boolean (default=True) if True, run the code to generate things for tools """ # read in BEAST settings settings_orig = beast_settings.beast_settings("beast_settings.txt") # also make a version with subgrids settings_subgrids = copy.deepcopy(settings_orig) settings_subgrids.n_subgrid = 2 settings_subgrids.project = f"{settings_orig.project}_subgrids" # ========================================== # run the beast for each set of settings # ========================================== if run_beast: for settings in [settings_orig, settings_subgrids]: # ----------------- # physics model # ----------------- create_physicsmodel.create_physicsmodel( settings, nsubs=settings.n_subgrid, nprocs=1, ) # ----------------- # ASTs # ----------------- # currently only works for no subgrids if settings.n_subgrid == 1: make_ast_inputs.make_ast_inputs(settings, pick_method="flux_bin_method") # ----------------- # obs model # ----------------- create_obsmodel.create_obsmodel( settings, use_sd=False, nsubs=settings.n_subgrid, nprocs=1, use_rate=True, ) # ----------------- # trimming # ----------------- # make file names file_dict = create_filenames.create_filenames( settings, use_sd=False, nsubs=settings.n_subgrid) # read in the observed data obsdata = Observations(settings.obsfile, settings.filters, settings.obs_colnames) for i in range(settings.n_subgrid): # get the modesedgrid on which to generate the noisemodel modelsedgridfile = file_dict["modelsedgrid_files"][i] modelsedgrid = SEDGrid(modelsedgridfile) # read in the noise model just created noisemodel_vals = noisemodel.get_noisemodelcat( file_dict["noise_files"][i]) # trim the model sedgrid sed_trimname = file_dict["modelsedgrid_trim_files"][i] noisemodel_trimname = file_dict["noise_trim_files"][i] trim_grid.trim_models( modelsedgrid, noisemodel_vals, obsdata, sed_trimname, noisemodel_trimname, sigma_fac=3.0, ) # ----------------- # fitting # ----------------- run_fitting.run_fitting( settings, use_sd=False, nsubs=settings.n_subgrid, nprocs=1, pdf2d_param_list=["Av", "M_ini", "logT"], pdf_max_nbins=200, ) # ----------------- # merging # ----------------- # it'll automatically skip for no subgrids merge_files.merge_files(settings, use_sd=False, nsubs=settings.n_subgrid) print("\n\n") # ========================================== # reference files for assorted tools # ========================================== if run_tools: # ----------------- # compare_spec_type # ----------------- # the input settings input = { "spec_ra": [72.67213351], "spec_dec": [-67.71720515], "spec_type": ["A"], "spec_subtype": [0], "lumin_class": ["IV"], "match_radius": 0.2, } # run it output = compare_spec_type.compare_spec_type( settings_orig.obsfile, "{0}/{0}_stats.fits".format(settings_orig.project), **input, ) # save the inputs and outputs asdf.AsdfFile({ "input": input, "output": output }).write_to("{0}/{0}_compare_spec_type.asdf".format( settings_orig.project)) # ----------------- # star_type_probability # ----------------- # input settings input = { "output_filebase": None, "ext_O_star_params": { "min_M_ini": 10, "min_Av": 0.5, "max_Av": 5 }, } # run it output = star_type_probability.star_type_probability( "{0}/{0}_pdf1d.fits".format(settings_orig.project), "{0}/{0}_pdf2d.fits".format(settings_orig.project), **input, ) # save the inputs and outputs asdf.AsdfFile({ "input": input, "output": output }).write_to("{0}/{0}_star_type_probability.asdf".format( settings_orig.project)) # ========================================== # asdf file permissions # ========================================== # for unknown reasons, asdf currently writes files with permissions set # to -rw-------. This changes it to -rw-r--r-- (like the rest of the # BEAST files) so Karl can easily copy them over to the cached file # website. # list of asdf files asdf_files = glob.glob("*/*.asdf") # go through each one to change permissions for fname in asdf_files: os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
def create_filenames( beast_settings_info, use_sd=True, nsubs=1, choose_sd_sub=None, choose_subgrid=None, ): """ Helper function to make all of the filenames. SED grid and noise model are trimmed versions. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance use_sd : boolean (default=True) If True, create source density dependent noise models (determined by finding matches to settings.astfile with SD info) nsubs : int (default=1) number of subgrids used for the physics model choose_sd_sub : list of two strings (default=None) If this is set, the fitting will just be for this combo of SD+sub, rather than all of them. Overrides use_sd. format of the list: ['#','#'] choose_subgrid : int (default=None) If this is set, the fitting with just be for this subgrid index. If nsubs=1, this is ignored. Returns ------- dictionary with the lists of filenames, plus the corresponding SD+sub and gridsub values for easy referencing """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # input files photometry_files = [] modelsedgrid_files = [] modelsedgrid_trim_files = [] noise_files = [] noise_trim_files = [] # output files stats_files = [] pdf_files = [] pdf2d_files = [] lnp_files = [] # other potentially useful things sd_sub_info = [] gridsub_info = [] # ** no subgrids ** if nsubs == 1: # -- SD+sub specified if choose_sd_sub is not None: photometry_files.append( settings.obsfile.replace( ".fits", "_bin{0}_sub{1}.fits".format(choose_sd_sub[0], choose_sd_sub[1]), )) modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format( settings.project)) modelsedgrid_trim_files.append( "{0}/{0}_bin{1}_sub{2}_seds_trim.grid.hd5".format( settings.project, choose_sd_sub[0], choose_sd_sub[1])) noise_files.append("{0}/{0}_noisemodel_bin{1}.grid.hd5".format( settings.project, choose_sd_sub[0])) noise_trim_files.append( "{0}/{0}_bin{1}_sub{2}_noisemodel_trim.grid.hd5".format( settings.project, choose_sd_sub[0], choose_sd_sub[1])) stats_files.append("{0}/{0}_bin{1}_sub{2}_stats.fits".format( settings.project, choose_sd_sub[0], choose_sd_sub[1])) pdf_files.append("{0}/{0}_bin{1}_sub{2}_pdf1d.fits".format( settings.project, choose_sd_sub[0], choose_sd_sub[1])) pdf2d_files.append("{0}/{0}_bin{1}_sub{2}_pdf2d.fits".format( settings.project, choose_sd_sub[0], choose_sd_sub[1])) lnp_files.append("{0}/{0}_bin{1}_sub{2}_lnp.hd5".format( settings.project, choose_sd_sub[0], choose_sd_sub[1])) sd_sub_info.append([choose_sd_sub[0], choose_sd_sub[1]]) # -- using source density info elif use_sd is True: photometry_files = sorted( glob.glob(settings.obsfile.replace(".fits", "_bin*_sub*.fits"))) for phot_file in photometry_files: # get the sd/sub number dpos = phot_file.rfind("_bin") spos = phot_file.rfind("sub") ppos = phot_file.rfind(".") curr_sd = phot_file[dpos + 4:spos - 1] curr_sub = phot_file[spos + 3:ppos] # construct other file names modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format( settings.project)) modelsedgrid_trim_files.append( "{0}/{0}_bin{1}_sub{2}_seds_trim.grid.hd5".format( settings.project, curr_sd, curr_sub)) noise_files.append("{0}/{0}_noisemodel_bin{1}.grid.hd5".format( settings.project, curr_sd)) noise_trim_files.append( "{0}/{0}_bin{1}_sub{2}_noisemodel_trim.grid.hd5".format( settings.project, curr_sd, curr_sub)) stats_files.append("{0}/{0}_bin{1}_sub{2}_stats.fits".format( settings.project, curr_sd, curr_sub)) pdf_files.append("{0}/{0}_bin{1}_sub{2}_pdf1d.fits".format( settings.project, curr_sd, curr_sub)) pdf2d_files.append("{0}/{0}_bin{1}_sub{2}_pdf2d.fits".format( settings.project, curr_sd, curr_sub)) lnp_files.append("{0}/{0}_bin{1}_sub{2}_lnp.hd5".format( settings.project, curr_sd, curr_sub)) sd_sub_info.append([curr_sd, curr_sub]) # -- no source density splitting else: photometry_files.append(settings.obsfile) modelsedgrid_files.append("{0}/{0}_seds.grid.hd5".format( settings.project)) modelsedgrid_trim_files.append("{0}/{0}_seds_trim.grid.hd5".format( settings.project)) noise_files.append("{0}/{0}_noisemodel.grid.hd5".format( settings.project)) noise_trim_files.append("{0}/{0}_noisemodel_trim.grid.hd5".format( settings.project)) stats_files.append("{0}/{0}_stats.fits".format(settings.project)) pdf_files.append("{0}/{0}_pdf1d.fits".format(settings.project)) pdf2d_files.append("{0}/{0}_pdf2d.fits".format(settings.project)) lnp_files.append("{0}/{0}_lnp.hd5".format(settings.project)) # ** with subgrids ** # subgrids require a pickle file with grid info gridpickle_files = [] if nsubs > 1: # start with getting the model grid files (note these aren't trimmed ones) outdir = os.path.join(".", settings.project) subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt") temp = get_modelsubgridfiles(subgrid_names_file) # use that to get the number of subgrids and make a list of them gridsub_list = np.arange(len(temp)) # or a subset if set if choose_subgrid is not None: gridsub_list = [choose_subgrid] # -- SD+sub specified if choose_sd_sub is not None: for gridsub in gridsub_list: photometry_files.append( settings.obsfile.replace( ".fits", "_bin{0}_sub{1}.fits".format(choose_sd_sub[0], choose_sd_sub[1]), )) modelsedgrid_files.append("{0}/{0}_seds.gridsub{1}.hd5".format( settings.project, gridsub)) modelsedgrid_trim_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_seds_trim.grid.hd5" .format(settings.project, choose_sd_sub[0], choose_sd_sub[1], gridsub)) noise_files.append( "{0}/{0}_noisemodel_bin{1}.gridsub{2}.hd5".format( settings.project, choose_sd_sub[0], gridsub)) noise_trim_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_noisemodel_trim.grid.hd5" .format(settings.project, choose_sd_sub[0], choose_sd_sub[1], gridsub)) stats_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_stats.fits" .format(settings.project, choose_sd_sub[0], choose_sd_sub[1], gridsub)) pdf_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf1d.fits" .format(settings.project, choose_sd_sub[0], choose_sd_sub[1], gridsub)) pdf2d_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf2d.fits" .format(settings.project, choose_sd_sub[0], choose_sd_sub[1], gridsub)) lnp_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_lnp.hd5". format(settings.project, choose_sd_sub[0], choose_sd_sub[1], gridsub)) gridpickle_files.append( "{0}/bin{1}_sub{2}/grid_info_dict.pkl".format( settings.project, choose_sd_sub[0], choose_sd_sub[1])) sd_sub_info.append([choose_sd_sub[0], choose_sd_sub[1]]) gridsub_info.append(gridsub) # -- using source density info elif use_sd is True: phot_file_list = sorted( glob.glob(settings.obsfile.replace(".fits", "_bin*_sub*.fits"))) for phot_file in phot_file_list: # get the sd/sub number dpos = phot_file.rfind("_bin") spos = phot_file.rfind("sub") ppos = phot_file.rfind(".") curr_sd = phot_file[dpos + 4:spos - 1] curr_sub = phot_file[spos + 3:ppos] # construct other file names for gridsub in gridsub_list: photometry_files.append(phot_file) modelsedgrid_files.append( "{0}/{0}_seds.gridsub{1}.hd5".format( settings.project, gridsub)) modelsedgrid_trim_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_seds_trim.grid.hd5" .format(settings.project, curr_sd, curr_sub, gridsub)) noise_files.append( "{0}/{0}_noisemodel_bin{1}.gridsub{2}.hd5".format( settings.project, curr_sd, gridsub)) noise_trim_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_noisemodel_trim.grid.hd5" .format(settings.project, curr_sd, curr_sub, gridsub)) stats_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_stats.fits" .format(settings.project, curr_sd, curr_sub, gridsub)) pdf_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf1d.fits" .format(settings.project, curr_sd, curr_sub, gridsub)) pdf2d_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_pdf2d.fits" .format(settings.project, curr_sd, curr_sub, gridsub)) lnp_files.append( "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}_gridsub{3}_lnp.hd5" .format(settings.project, curr_sd, curr_sub, gridsub)) gridpickle_files.append( "{0}/bin{1}_sub{2}/grid_info_dict.pkl".format( settings.project, curr_sd, curr_sub)) sd_sub_info.append([curr_sd, curr_sub]) gridsub_info.append(gridsub) # -- no source density splitting else: for gridsub in gridsub_list: photometry_files.append(settings.obsfile) modelsedgrid_files.append("{0}/{0}_seds.gridsub{1}.hd5".format( settings.project, gridsub)) modelsedgrid_trim_files.append( "{0}/{0}_gridsub{1}_seds_trim.grid.hd5".format( settings.project, gridsub)) noise_files.append("{0}/{0}_noisemodel.gridsub{1}.hd5".format( settings.project, gridsub)) noise_trim_files.append( "{0}/{0}_gridsub{1}_noisemodel_trim.grid.hd5".format( settings.project, gridsub)) stats_files.append("{0}/{0}_gridsub{1}_stats.fits".format( settings.project, gridsub)) pdf_files.append("{0}/{0}_gridsub{1}_pdf1d.fits".format( settings.project, gridsub)) pdf2d_files.append("{0}/{0}_gridsub{1}_pdf2d.fits".format( settings.project, gridsub)) lnp_files.append("{0}/{0}_gridsub{1}_lnp.hd5".format( settings.project, gridsub)) gridpickle_files.append("{0}/grid_info_dict.pkl".format( settings.project)) gridsub_info.append(gridsub) # double check that all file lists are the same length n_file_list = [ len(x) for x in [ photometry_files, modelsedgrid_files, modelsedgrid_trim_files, noise_files, noise_trim_files, stats_files, pdf_files, pdf2d_files, lnp_files, ] ] if len(np.unique(n_file_list)) > 1: print("file list lengths don't match!") return None return { "photometry_files": photometry_files, "modelsedgrid_files": modelsedgrid_files, "modelsedgrid_trim_files": modelsedgrid_trim_files, "noise_files": noise_files, "noise_trim_files": noise_trim_files, "stats_files": stats_files, "pdf_files": pdf_files, "pdf2d_files": pdf2d_files, "lnp_files": lnp_files, "gridpickle_files": gridpickle_files, "sd_sub_info": sd_sub_info, "gridsub_info": gridsub_info, }
def run_fitting( beast_settings_info, use_sd=True, nsubs=1, nprocs=1, choose_sd_sub=None, choose_subgrid=None, pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'], pdf_max_nbins=200, resume=False, ): """ Run the fitting. If nsubs > 1, this will find existing subgrids. If use_sd is True, will also incorporate source density info. The additional choose_* options are to make queue scripts usable, by specifying a given SD+sub and/or subgrid for the fitting run. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance use_sd : boolean (default=True) If True, create source density dependent noise models (determined by finding matches to settings.astfile with SD info) nsubs : int (default=1) number of subgrids used for the physics model nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) choose_sd_sub : list of two strings (default=None) If this is set, the fitting will just be for this combo of SD+sub, rather than all of them. Overrides use_sd. format of the list: ['#','#'] choose_subgrid : int (default=None) If this is set, the fitting with just be for this subgrid index. If nsubs=1, this is ignored. pdf2d_param_list : list of strings or None If set, do 2D PDFs of these parameters. If None, don't make 2D PDFs. pdf_max_nbins : int (default=100) Maxiumum number of bins to use for the 1D and 2D PDFs resume : boolean (default=False) choose whether to resume existing run or start over """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # keep track of time start_time = time.perf_counter() # -------------------- # make lists of file names # -------------------- file_dict = create_filenames.create_filenames( settings, use_sd=use_sd, nsubs=nsubs, choose_sd_sub=choose_sd_sub, choose_subgrid=choose_subgrid, ) # input files photometry_files = file_dict["photometry_files"] # modelsedgrid_files = file_dict["modelsedgrid_files"] modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"] # noise_files = file_dict["noise_files"] noise_trim_files = file_dict["noise_trim_files"] # output files stats_files = file_dict["stats_files"] pdf_files = file_dict["pdf_files"] pdf2d_files = file_dict["pdf2d_files"] if pdf2d_param_list is None: pdf2d_files = [None for i in range(len(pdf2d_files))] lnp_files = file_dict["lnp_files"] # total number of files n_files = len(photometry_files) # other potentially useful info sd_sub_info = file_dict["sd_sub_info"] # gridsub_info = file_dict['gridsub_info'] # if using subgrids, make the grid dictionary file: # File where the ranges and number of unique values for the grid # will be stored (this can take a while to calculate) if nsubs > 1: gridpickle_files = file_dict["gridpickle_files"] for i in range(len(gridpickle_files)): if not os.path.isfile(gridpickle_files[i]): # list of corresponding SED grids and noise models # - with SD+sub: get file list for ALL subgrids at current SD+sub if use_sd or (choose_sd_sub is not None): temp = create_filenames.create_filenames( settings, nsubs=nsubs, choose_sd_sub=sd_sub_info[i], choose_subgrid=None) modelsedgrid_trim_list = temp["modelsedgrid_trim_files"] noise_trim_list = temp["noise_trim_files"] # - no SD info: get file list for ALL subgrids else: temp = create_filenames.create_filenames( settings, use_sd=False, nsubs=nsubs, choose_subgrid=None) modelsedgrid_trim_list = temp["modelsedgrid_trim_files"] noise_trim_list = temp["noise_trim_files"] # create the grid info dictionary print("creating grid_info_dict for " + gridpickle_files[i]) grid_info_dict = subgridding_tools.reduce_grid_info( modelsedgrid_trim_list, noise_trim_list, nprocs=nprocs) # save it with open(gridpickle_files[i], "wb") as p: pickle.dump(grid_info_dict, p) print("wrote grid_info_dict to " + gridpickle_files[i]) # -------------------- # do the fitting! # -------------------- # set up function inputs if nsubs == 1: input_list = [( settings, photometry_files[i], modelsedgrid_trim_files[i], noise_trim_files[i], pdf_max_nbins, stats_files[i], pdf_files[i], pdf2d_files[i], pdf2d_param_list, lnp_files[i], None, resume, ) for i in range(n_files)] if nsubs > 1: input_list = [( settings, photometry_files[i], modelsedgrid_trim_files[i], noise_trim_files[i], pdf_max_nbins, stats_files[i], pdf_files[i], pdf2d_files[i], pdf2d_param_list, lnp_files[i], gridpickle_files[i], resume, ) for i in range(n_files)] # run the fitting (via parallel wrapper) parallel_wrapper(fit_submodel, input_list, nprocs=nprocs) # see how long it took! new_time = time.perf_counter() print("time to fit: ", (new_time - start_time) / 60.0, " min")
def create_obsmodel( beast_settings_info, use_sd=True, nsubs=1, nprocs=1, subset=[None, None], ): """ Create the observation models. If nsubs > 1, this will find existing subgrids. If use_sd is True, will also incorporate source density info. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance use_sd : boolean (default=True) If True, create source density dependent noise models (determined by finding matches to settings.astfile with SD info) nsubs : int (default=1) number of subgrids used for the physics model nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) subset : list of two ints (default=[None,None]) Only process subgrids in the range [start,stop]. (only relevant if nsubs > 1) """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # -------------------- # figure out if there are source density bins # -------------------- ast_file_list = sorted( glob.glob(settings.astfile.replace(".fits", "*_bin*"))) if use_sd and (len(ast_file_list) > 0): sd_list = [] for ast_file in ast_file_list: dpos = ast_file.rfind("_bin") ppos = ast_file.rfind(".") sd_list.append(ast_file[dpos + 4:ppos]) print("sd list: ", sd_list) else: # if there are no ASTs with source densities, the flag should be "false" use_sd = False # -------------------- # no subgrids # -------------------- if nsubs == 1: modelsedgridfile = "{0}/{0}_seds.grid.hd5".format(settings.project) # if we're splitting by source density if use_sd: input_list = [(settings, modelsedgridfile, curr_sd) for curr_sd in sd_list] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs) # if we're not splitting by source density else: input_list = [(settings, modelsedgridfile, None)] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs) # -------------------- # use subgrids # -------------------- if nsubs > 1: # get the list of physics model files outdir = os.path.join(".", settings.project) subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt") modelsedgridfiles = get_modelsubgridfiles(subgrid_names_file)[slice( subset[0], subset[1])] # if we're splitting by source density if use_sd: input_list = [(settings, sedfile, curr_sd) for sedfile in modelsedgridfiles for curr_sd in sd_list] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs) # if we're not splitting by source density else: input_list = [(settings, sedfile, None) for sedfile in modelsedgridfiles] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)
def plot_source_density_map(sd_image_file, beast_settings_file): """ Make a plot of the source density. The spatial bins are split into 5 arcsec^2. Source density bins are determined by the specified beast settings file. The histogram bins are set by the bins originally used to create the ASTs (using the flux bin method), which are saved in ast_file.replace('inputAST','ASTfluxbins') and are automatically read in. Output plot is saved in the same location/name as image file, but with a .png instead of .fits. Parameters ---------- sd_image_file : string name of SD image FITS file beast_settings_file : string name of beast settings .txt file """ image_file = fits.open(sd_image_file) image_file.info() # assuming the image data is first image_data = image_file[0].data image_file.close() # read in beast settings file settings = beast_settings.beast_settings(beast_settings_file) if settings.sd_binmode == "custom": sd_bins = settings.sd_custom # throw error if binning isn't custom else: raise Exception( 'Expected custom binning. Please ensure the right beast settings file is specified.' ) # define colormap cmap = plt.cm.viridis # extract all colors from the .jet map cmaplist = [cmap(i) for i in range(cmap.N)] # create the new map cmap = mpl.colors.LinearSegmentedColormap.from_list( 'Custom cmap', cmaplist, cmap.N) # define the bins and normalize norm = mpl.colors.BoundaryNorm(sd_bins, cmap.N) fig = plt.figure(0, [10, 10]) im = plt.imshow(image_data, origin="lower", cmap=cmap, norm=norm) plt.colorbar(im, fraction=0.046, pad=0.04, ticks=sd_bins) plt.xlabel("Pixel (originally RA)") plt.ylabel("Pixel (originally DEC)") plt.title(r"Density of Sources per 5 arcsec$^2$") plt.tight_layout() fig.savefig(sd_image_file.replace("_image.fits", "_map_plot.png")) plt.close(fig)
def setup_batch_beast_fit( beast_settings_info, num_percore=5, nice=None, overwrite_logfile=True, prefix=None, use_sd=True, pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'], nsubs=1, nprocs=1, ): """ Sets up batch files for submission to the 'at' queue on linux (or similar) systems Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance num_percore : int (default = 5) number of fitting runs per core nice : int (default = None) set this to an integer (-20 to 20) to prepend a "nice" level to the fitting command overwrite_logfile : boolean (default = True) if True, will overwrite the log file; if False, will append to existing log file prefix : string (default=None) Set this to a string (such as 'source activate astroconda') to prepend to each batch file (use '\n's to make multiple lines) use_sd : boolean (default=True) If True, split runs based on source density (determined by finding matches to settings.astfile with SD info) pdf2d_param_list : list of strings or None If set, do 2D PDFs of these parameters. If None, don't make 2D PDFs. nsubs : int (default=1) number of subgrids used for the physics model nprocs : int (default=1) Number of parallel processes to use when doing the fitting (currently only implemented for subgrids) Returns ------- run_info_dict : dict Dictionary indicating which catalog files have complete modeling, and which job files need to be run """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # setup the subdirectory for the batch and log files job_path = settings.project + "/fit_batch_jobs/" if not os.path.isdir(job_path): os.mkdir(job_path) log_path = job_path + "logs/" if not os.path.isdir(log_path): os.mkdir(log_path) # get file name lists (to check if they exist and/or need to be resumed) file_dict = create_filenames.create_filenames(settings, use_sd=use_sd, nsubs=nsubs) # - input files photometry_files = file_dict["photometry_files"] # modelsedgrid_files = file_dict['modelsedgrid_files'] # noise_files = file_dict['noise_files'] # - output files stats_files = file_dict["stats_files"] pdf_files = file_dict["pdf_files"] lnp_files = file_dict["lnp_files"] # - total number of files n_files = len(photometry_files) # - other useful info sd_sub_info = file_dict["sd_sub_info"] gridsub_info = file_dict["gridsub_info"] # names of output log files log_files = [] # initialize a variable name (otherwise it got auto-added in the wrong # place and broke the code) pf = None for i in range(n_files): sd_piece = "" if use_sd is True: sd_piece = "_bin" + sd_sub_info[i][0] + "_sub" + sd_sub_info[i][1] gridsub_piece = "" if nsubs > 1: gridsub_piece = "_gridsub" + str(gridsub_info[i]) log_files.append("beast_fit" + sd_piece + gridsub_piece + ".log") # start making the job files! pf_open = False cur_f = 0 cur_total_size = 0.0 j = -1 # keep track of which files are done running run_info_dict = { "phot_file": photometry_files, "done": np.full(n_files, False), "files_to_run": [], } for i, phot_file in enumerate(photometry_files): print("") # check if this is a full run reg_run = False run_done = False if not os.path.isfile(stats_files[i]): reg_run = True print("no stats file") if not os.path.isfile(pdf_files[i]): reg_run = True print("no pdf1d file") if not os.path.isfile(lnp_files[i]): reg_run = True print("no lnp file") # first check if the pdf1d mass spacing is correct if not reg_run: hdulist = fits.open(pdf_files[i]) delta1 = hdulist["M_ini"].data[-1, 1] - hdulist["M_ini"].data[-1, 0] if delta1 > 1.0: # old linear spacing print("pdf1d lin mass spacing - full refitting needed") old_mass_spacing = True else: old_mass_spacing = False print("pdf1d log mass spacing - ok") if old_mass_spacing: run_done = False reg_run = True # now check if the number of results is the same as # the number of observations if not reg_run: # get the observed catalog obs = Table.read(phot_file) # get the fit results catalog t = Table.read(stats_files[i], hdu=1) # get the number of stars that have been fit (indxs, ) = np.where(t["Pmax"] != 0.0) # get the number of entries in the lnp file f = tables.open_file(lnp_files[i], "r") nlnp = f.root._v_nchildren - 2 f.close() print("# obs, stats, lnp = ", len(obs), len(indxs), nlnp) if (len(indxs) == len(obs)) & (nlnp == len(obs)): # final check, is the pdf1d file correctly populated tot_prob = np.sum(hdulist["M_ini"].data, axis=1) (tindxs, ) = np.where(tot_prob > 0.0) print("# good pdf1d = ", len(tindxs) - 1) if len(tindxs) == (len(obs) + 1): run_done = True if run_done: print(stats_files[i] + " done") run_info_dict["done"][i] = True else: j += 1 if j % num_percore == 0: cur_f += 1 # close previous files if j != 0: pf.close() # slurm needs the job file to be executable # flake8/codestyle error ignored as this if statement only executed # for j > 0 and appropriate joblist_file defined in j - 1 os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH) # noqa: F821 print( "total sed_trim size [Gb] = ", cur_total_size / (1024.0 * 1024.0 * 1024.0), ) cur_total_size = 0.0 # open the slurm and param files pf_open = True joblist_file = job_path + "beast_batch_fit_" + str( cur_f) + ".joblist" pf = open(joblist_file, "w") run_info_dict["files_to_run"].append(joblist_file) # write out anything at the beginning of the file if prefix is not None: pf.write(prefix + "\n") # flag for resuming resume_str = "" if reg_run: print(stats_files[i] + " does not exist " + "- adding job as a regular fit job (not resume job)") else: print(stats_files[i] + " not done - adding to continue fitting list (" + str(len(indxs)) + "/" + str(len(t["Pmax"])) + ")") resume_str = "-r" # prepend a `nice` value nice_str = "" if nice is not None: nice_str = "nice -n" + str(int(nice)) + " " # choose whether to append or overwrite log file pipe_str = " > " if not overwrite_logfile: pipe_str = " >> " # set SD+sub option sd_str = "" if use_sd is True: sd_str = ' --choose_sd_sub "{0}" "{1}" '.format( sd_sub_info[i][0], sd_sub_info[i][1]) # set gridsub option gs_str = "" if nsubs > 1: gs_str = " --choose_subgrid {0} ".format(gridsub_info[i]) # set 2D PDF option if pdf2d_param_list is None: pdf2d_str = "None" else: pdf2d_str = " " + " ".join(pdf2d_param_list) + " " job_command = (nice_str + "python -m beast.tools.run.run_fitting " + " {0} ".format(settings.settings_file) + resume_str + sd_str + gs_str + " --nsubs " + str(nsubs) + " --nprocs " + str(nprocs) + " --pdf2d_param_list " + pdf2d_str + pipe_str + log_path + log_files[i]) pf.write(job_command + "\n") if pf_open: pf.close() # slurm needs the job file to be executable os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH) # return the info about completed modeling return run_info_dict
def create_physicsmodel(beast_settings_info, nsubs=1, nprocs=1, subset=[None, None]): """ Create the physics model grid. If nsubs > 1, this will make sub-grids. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance nsubs : int (default=1) number of subgrids to split the physics model into nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) subset : list of two ints (default=[None,None]) Only process subgrids in the range [start,stop]. (only relevant if nsubs > 1) """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # filename for the SED grid modelsedgrid_filename = "%s/%s_seds.grid.hd5" % ( settings.project, settings.project, ) # grab the current subgrid slice subset_slice = slice(subset[0], subset[1]) # make sure the project directory exists create_project_dir(settings.project) # download and load the isochrones (iso_fname, oiso) = make_iso_table( settings.project, oiso=settings.oiso, logtmin=settings.logt[0], logtmax=settings.logt[1], dlogt=settings.logt[2], z=settings.z, ) if hasattr(settings, "add_spectral_properties_kwargs"): extra_kwargs = settings.add_spectral_properties_kwargs else: extra_kwargs = None if hasattr(settings, "velocity"): redshift = (settings.velocity / const.c).decompose().value else: redshift = 0 # generate the spectral library (no dust extinction) (spec_fname, g_spec) = make_spectral_grid( settings.project, oiso, osl=settings.osl, redshift=redshift, distance=settings.distances, distance_unit=settings.distance_unit, extLaw=settings.extLaw, add_spectral_properties_kwargs=extra_kwargs, ) # add the stellar priors as weights # also computes the grid weights for the stellar part (pspec_fname, g_pspec) = add_stellar_priors( settings.project, g_spec, age_prior_model=settings.age_prior_model, mass_prior_model=settings.mass_prior_model, met_prior_model=settings.met_prior_model, distance_prior_model=settings.distance_prior_model, ) # -------------------- # no subgrids # -------------------- if nsubs == 1: # generate the SED grid by integrating the filter response functions # effect of dust extinction applied before filter integration # also computes the dust priors as weights make_extinguished_sed_grid( settings.project, g_pspec, settings.filters, extLaw=settings.extLaw, av=settings.avs, rv=settings.rvs, fA=settings.fAs, rv_prior_model=settings.rv_prior_model, av_prior_model=settings.av_prior_model, fA_prior_model=settings.fA_prior_model, spec_fname=modelsedgrid_filename, add_spectral_properties_kwargs=extra_kwargs, ) # -------------------- # use subgrids # -------------------- if nsubs > 1: # Work with the whole grid up to there (otherwise, priors need a # rework - they don't like having only a subset of the parameter # space, especially when there's only one age for example) # Make subgrids, by splitting the spectral grid into equal sized pieces custom_sub_pspec = subgridding_tools.split_grid(pspec_fname, nsubs) file_prefix = "{0}/{0}_".format(settings.project) # function to process the subgrids individually def gen_subgrid(i, sub_name): sub_g_pspec = SpectralGrid(sub_name) sub_seds_fname = "{}seds.gridsub{}.hd5".format(file_prefix, i) # generate the SED grid by integrating the filter response functions # effect of dust extinction applied before filter integration # also computes the dust priors as weights (sub_seds_fname, sub_g_seds) = make_extinguished_sed_grid( settings.project, sub_g_pspec, settings.filters, extLaw=settings.extLaw, av=settings.avs, rv=settings.rvs, fA=settings.fAs, rv_prior_model=settings.rv_prior_model, av_prior_model=settings.av_prior_model, fA_prior_model=settings.fA_prior_model, add_spectral_properties_kwargs=extra_kwargs, seds_fname=sub_seds_fname, ) return sub_seds_fname # run the above function par_tuples = [(i, sub_name) for i, sub_name in enumerate(custom_sub_pspec)][ subset_slice ] parallel_wrapper(gen_subgrid, par_tuples, nprocs=nprocs) # Save a list of subgrid names that we expect to see required_names = [ "{}seds.gridsub{}.hd5".format(file_prefix, i) for i in range(nsubs) ] outdir = os.path.join(".", settings.project) subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt") with open(subgrid_names_file, "w") as fname_file: for fname in required_names: fname_file.write(fname + "\n")
def setUpClass(cls): # download the BEAST library files get_libfiles.get_libfiles() cls.dset = "metal" if cls.dset == "metal": cls.basesubdir = "metal_small_15Mar22/" cls.basename = f"{cls.basesubdir}beast_metal_small" cls.obsname = f"{cls.basesubdir}14675_LMC-13361nw-11112.gst_samp.fits" cls.astname = f"{cls.basesubdir}14675_LMC-13361nw-11112.gst.fake.fits" # download the cached version for use and comparision # - photometry and ASTs cls.obs_fname_cache = download_rename(cls.obsname) cls.asts_fname_cache = download_rename(cls.astname) # - isochrones cls.iso_fname_cache = download_rename(f"{cls.basename}_iso.csv") # - spectra # - spectra cls.spec_fname_cache = download_rename(f"{cls.basename}_spec_grid.hd5") # - spectra with priors cls.priors_fname_cache = download_rename( f"{cls.basename}_spec_w_priors.grid.hd5") cls.priors_sub0_fname_cache = download_rename( f"{cls.basename}_subgrids_spec_w_priors.gridsub0.hd5") cls.priors_sub1_fname_cache = download_rename( f"{cls.basename}_subgrids_spec_w_priors.gridsub1.hd5") # - SED grids cls.seds_fname_cache = download_rename(f"{cls.basename}_seds.grid.hd5") cls.seds_sub0_fname_cache = download_rename( f"{cls.basename}_subgrids_seds.gridsub0.hd5") cls.seds_sub1_fname_cache = download_rename( f"{cls.basename}_subgrids_seds.gridsub1.hd5") # - noise model cls.noise_fname_cache = download_rename( f"{cls.basename}_noisemodel.grid.hd5") cls.noise_sub0_fname_cache = download_rename( f"{cls.basename}_subgrids_noisemodel.gridsub0.hd5") cls.noise_sub1_fname_cache = download_rename( f"{cls.basename}_subgrids_noisemodel.gridsub1.hd5") # - trimmed files cls.noise_trim_fname_cache = download_rename( f"{cls.basename}_noisemodel_trim.grid.hd5") cls.seds_trim_fname_cache = download_rename( f"{cls.basename}_seds_trim.grid.hd5") # - output files cls.stats_fname_cache = download_rename(f"{cls.basename}_stats.fits") cls.lnp_fname_cache = download_rename(f"{cls.basename}_lnp.hd5") cls.pdf1d_fname_cache = download_rename(f"{cls.basename}_pdf1d.fits") cls.pdf2d_fname_cache = download_rename(f"{cls.basename}_pdf2d.fits") # create the beast_settings object # (copied over from the metal_small example in beast-examples) cls.settings_fname_cache = download_rename( f"{cls.basesubdir}beast_settings.txt") cls.settings = beast_settings.beast_settings(cls.settings_fname_cache) # update names of photometry and AST files cls.settings.obsfile = cls.obs_fname_cache cls.settings.astfile = cls.asts_fname_cache # also make a version with 2 subgrids cls.settings_sg = copy.deepcopy(cls.settings) cls.settings_sg.n_subgrid = 2 cls.settings_sg.project = f"{cls.settings.project}_subgrids"
def make_trim_scripts( beast_settings_info, num_subtrim=1, nice=None, prefix=None, ): """ `setup_batch_beast_trim.py` uses file names to create batch trim files. This generates all of the file names for that function. NOTE: This assumes you're using source density or background dependent noise models. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance num_subtrim : int (default = 1) number of trim batch jobs nice : int (default = None) set this to an integer (-20 to 20) to prepend a "nice" level to the trimming command prefix : string (default=None) Set this to a string (such as 'source activate astroconda') to prepend to each batch file (use '\n's to make multiple lines) Returns ------- job_files : list of strings Names of the newly created job files """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # make lists of file names file_dict = create_filenames.create_filenames( settings, use_sd=True, nsubs=settings.n_subgrid, ) # extract some useful ones photometry_files = file_dict["photometry_files"] modelsedgrid_files = file_dict["modelsedgrid_files"] noise_files = file_dict["noise_files"] modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"] noise_trim_files = file_dict["noise_trim_files"] # the unique sets of things unique_sedgrid = [ x for i, x in enumerate(modelsedgrid_files) if i == modelsedgrid_files.index(x) ] # save the list of job files job_file_list = [] # iterate through each model grid for i in range(settings.n_subgrid): # indices for this model grid grid_ind = [ ind for ind, mod in enumerate(modelsedgrid_files) if mod == unique_sedgrid[i] ] # create corresponding files for each of those input_noise = [noise_files[ind] for ind in grid_ind] input_phot = [photometry_files[ind] for ind in grid_ind] # to get the trim prefix, find the common string between trimmed noise # files and trimmed SED files input_trim_prefix = [] for ind in grid_ind: str1 = modelsedgrid_trim_files[ind] str2 = noise_trim_files[ind] # find longest match match = SequenceMatcher(None, str1, str2).find_longest_match( 0, len(str1), 0, len(str2)) # grab that substring (and remove trailing "_") input_trim_prefix.append(str1[match.a:match.a + match.size][:-1]) # check if the trimmed grids exist before moving on check_trim = [ os.path.isfile(noise_trim_files[ind]) for ind in grid_ind ] # if any aren't trimmed for this model grid, set up trimming if np.sum(check_trim) < len(input_noise): job_path = "./{0}/trim_batch_jobs/".format(settings.project) if settings.n_subgrid > 1: file_prefix = "BEAST_gridsub" + str(i) if settings.n_subgrid == 1: file_prefix = "BEAST" # generate trimming at-queue commands setup_batch_beast_trim.generic_batch_trim( unique_sedgrid[i], input_noise, input_phot, input_trim_prefix, settings.obs_colnames, job_path=job_path, file_prefix=file_prefix, num_subtrim=num_subtrim, nice=nice, prefix=prefix, ) job_file_list.append(job_path + file_prefix + "_batch_trim.joblist") return job_file_list
make_spectral_grid, add_stellar_priors, make_extinguished_sed_grid, ) from beast.observationmodel.observations import Observations import beast.observationmodel.noisemodel.generic_noisemodel as noisemodel from beast.observationmodel.ast import make_ast_input_list, make_ast_xy_list from beast.fitting import fit, trim_grid from beast.physicsmodel.grid import FileSEDGrid from beast.tools import beast_settings, subgridding_tools import pickle import os settings = beast_settings.beast_settings('beast_settings.txt') outdir = os.path.join(".", settings.project) subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt") if __name__ == "__main__": # commandline parser parser = argparse.ArgumentParser() parser.add_argument( "-p", "--physicsmodel", help="Generate the physics model grid", action="store_true", ) parser.add_argument("-a", "--ast", help="Generate an input AST file",
def beast_ast_inputs(field_name=None, ref_image=None, filter_ids=None, galaxy=None): """ This does all of the steps for generating AST inputs and can be used a wrapper to automatically do most steps for multiple fields. * make field's beast_settings file * make source density map * make background density map * split catalog by source density * make physics model (SED grid) * make input list for ASTs * prune input ASTs ---- Inputs: field_name (str): name of field ref_image (str): path to reference image filter_ids (list): list of indexes corresponding to the filters in the observation, referenced to the master list below. galaxy (str): name of target galaxy (e.g., 'SMC', 'LMC') ---- Places for user to manually do things: * editing code before use - here: list the catalog filter names with the corresponding BEAST names - here: choose settings (pixel size, filter, mag range) for the source density map - here: choose settings (pixel size, reference image) for the background map """ # the list of fields field_names = [field_name] # the path+file for a reference image im_path = [ref_image] ref_filter = ["F475W"] # choose a filter to use for removing artifacts # (remove catalog sources with filter_FLAG > 99) flag_filter = ["F475W"] # number of fields n_field = len(field_names) # Need to know what the correspondence is between filter names in the # catalog and the BEAST filter names. # # These will be used to automatically determine the filters present in # each GST file and fill in the beast settings file. The order doesn't # matter, as long as the order in one list matches the order in the other # list. # gst_filter_names = [ "F225W", "F275W", "F336W", "F475W", "F814W", "F110W", "F160W", "F657N", ] beast_filter_names = [ "HST_WFC3_F225W", "HST_WFC3_F275W", "HST_WFC3_F336W", "HST_WFC3_F475W", "HST_WFC3_F814W", "HST_WFC3_F110W", "HST_WFC3_F160W", "HST_WFC3_F657N", ] filter_ids = [int(i) for i in filter_ids] gst_filter_names = [gst_filter_names[i] for i in filter_ids] beast_filter_names = [beast_filter_names[i] for i in filter_ids] for b in range(n_field): print("********") print("field " + field_names[b]) print("********") # ----------------- # data file names # ----------------- # paths for the data/AST files gst_file = "./data/" + field_names[b] + ".st.fits" ast_file = "./data/" + field_names[b] + ".st.fake.fits" # path for the reference image (if using for the background map) im_file = im_path[b] # region file with catalog stars # make_region_file(gst_file, ref_filter[b]) # ----------------- # 0. make beast settings file # ----------------- print("") print("creating beast settings file") print("") create_beast_settings( gst_file, ast_file, gst_filter_names, beast_filter_names, galaxy, ref_image=im_file, ) # load in beast settings to get number of subgrids settings = beast_settings.beast_settings( "beast_settings_" + galaxy + "_asts_" + field_names[b] + ".txt" ) # ----------------- # 1a. make magnitude histograms # ----------------- print("") print("making magnitude histograms") print("") # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'): peak_mags = plot_mag_hist.plot_mag_hist(gst_file, stars_per_bin=70, max_bins=75) # ----------------- # 1b. make a source density map # ----------------- print("") print("making source density map") print("") # not currently doing background density bins # use_bg_info = True use_bg_info = False if use_bg_info: background_args = types.SimpleNamespace( subcommand="background", catfile=gst_file, pixsize=5, npix=None, reference=im_file, mask_radius=10, ann_width=20, cat_filter=[ref_filter, "90"], ) create_background_density_map.main_make_map(background_args) # but we are doing source density bins! if not os.path.isfile(gst_file.replace(".fits", "_source_den_image.fits")): # - pixel size of 10 arcsec # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5 sourceden_args = types.SimpleNamespace( subcommand="sourceden", catfile=gst_file, pixsize=5, npix=None, mag_name=ref_filter[0] + "_VEGA", mag_cut=[17, peak_mags[ref_filter[0]] - 0.5], flag_name=flag_filter[0] + "_FLAG", ) create_background_density_map.main_make_map(sourceden_args) # new file name with the source density column gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits") # ----------------- # 2. make physics model # ----------------- print("") print("making physics model") print("") # see which subgrid files already exist gs_str = "" if settings.n_subgrid > 1: gs_str = "sub*" sed_files = glob.glob( "./{0}_beast/{0}_beast_seds.grid{1}.hd5".format(field_names[b], gs_str) ) # only make the physics model they don't already exist if len(sed_files) < settings.n_subgrid: # directly create physics model grids create_physicsmodel.create_physicsmodel( settings, nprocs=1, nsubs=settings.n_subgrid ) # ------------------- # 3. make AST inputs # ------------------- # only create an AST input list if the ASTs don't already exist ast_input_file = "./" + field_names[b] + "/" + field_names[b] + "_inputAST.txt" if not os.path.isfile(ast_input_file): make_ast_inputs.make_ast_inputs(settings, pick_method="flux_bin_method") # list of SED files (physics models) model_grid_files = sorted( glob.glob( "./{0}/{0}_seds.grid*.hd5".format( field_names[b], ) ) ) # -------------------- # 3.1 "prune" AST inputs # -------------------- # prune input AST by flux (empirically determined) ast_input_tab = Table.read(ast_input_file, format="ascii") ast_input_tab_pruned = ast_input_tab.copy() if "F336W" in gst_filter_names: prune_spots = ( (ast_input_tab_pruned["HST_WFC3_F336W"] > 30.5) & (ast_input_tab_pruned["HST_WFC3_F475W"] > 32.5) & (ast_input_tab_pruned["HST_WFC3_F814W"] > 29.0) ) else: prune_spots = (ast_input_tab_pruned["HST_WFC3_F475W"] > 32.5) & ( ast_input_tab_pruned["HST_WFC3_F814W"] > 29.0 ) ast_input_tab_pruned = ast_input_tab_pruned[~prune_spots] # write pruned ast input table to a txt file ast_input_file_pruned = ( "./" + field_name + "/" + field_name + "_inputAST_pruned.txt" ) ast_input_tab_pruned.write( ast_input_file_pruned, format="ascii", overwrite=True ) # print out number of pruned ASTs per source density bin as a sanity check print("pruned input AST statistics per bin") input_ast_bin_stats(settings, ast_input_file_pruned, field_names[b]) # compare magnitude histograms of pruned ASTs with catalog plot_ast_histogram.plot_ast_histogram( ast_file=ast_input_file_pruned, sed_grid_file=model_grid_files[0] ) print("now go check the diagnostic plots!")