def merge_files(use_sd=True, nsubs=1): """ Merge all of the results from the assorted fitting sub-files (divided by source density, subgrids, or both). Parameters ---------- use_sd : boolean (default=True) If True, create source density dependent noise models (determined by finding matches to datamodel.astfile with SD info) nsubs : int (default=1) number of subgrids used for the physics model """ # if there's no SD and no subgridding, running this is unnecessary if use_sd and (nsubs == 1): print("No merging necessary") return # before doing ANYTHING, force datamodel to re-import (otherwise, any # changes within this python session will not be loaded!) importlib.reload(datamodel) # check input parameters verify_params.verify_input_format(datamodel) # get file name lists (to check if they exist and/or need to be resumed) file_dict = create_filenames.create_filenames(use_sd=use_sd, nsubs=nsubs) # - input files # photometry_files = file_dict['photometry_files'] # modelsedgrid_files = file_dict['modelsedgrid_files'] # noise_files = file_dict['noise_files'] # - output files stats_files = file_dict["stats_files"] pdf_files = file_dict["pdf_files"] # lnp_files = file_dict['lnp_files'] # - other useful info sd_sub_info = file_dict["sd_sub_info"] # gridsub_info = file_dict['gridsub_info'] # the unique sets of gridsub unique_sd_sub = [ x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x) ] # -------------------- # no subgrids # -------------------- if nsubs == 1: out_filebase = "{0}/{0}".format(datamodel.project) reorder_tags = [ "sd{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub ] merge_beast_stats.merge_stats_files(stats_files, out_filebase, reorder_tag_list=reorder_tags) # -------------------- # use subgrids # -------------------- if nsubs > 1: # runs were split by source density if use_sd: # lists to save the merged file names merged_pdf_files = [] merged_stats_files = [] for i, sd_sub in enumerate(unique_sd_sub): # indices with the current sd_sub ind = [j for j, x in enumerate(sd_sub_info) if x == sd_sub] # merge the subgrid files for that SD+sub out_filebase = "{0}/SD{1}_sub{2}/{0}_SD{1}_sub{2}".format( datamodel.project, sd_sub[0], sd_sub[1]) merged_pdf1d_fname, merged_stats_fname = subgridding_tools.merge_pdf1d_stats( [pdf_files[j] for j in ind], [stats_files[j] for j in ind], re_run=False, output_fname_base=out_filebase, ) merged_pdf_files.append(merged_pdf1d_fname) merged_stats_files.append(merged_stats_fname) # merge the merged stats files out_filebase = "{0}/{0}".format(datamodel.project) reorder_tags = [ "sd{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub ] merge_beast_stats.merge_stats_files(merged_stats_files, out_filebase, reorder_tag_list=reorder_tags) # runs weren't split by source density else: out_filebase = "{0}/{0}".format(datamodel.project) subgridding_tools.merge_pdf1d_stats(pdf_files, stats_files, output_fname_base=out_filebase)
def merge_files(beast_settings_info, use_sd=True, nsubs=1, partial=False): """ Merge all of the results from the assorted fitting sub-files (divided by source density, subgrids, or both). If fitting is in progress but you want to check results of completed stars, set partial=True. This is only relevant when using subgrids. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance use_sd : boolean (default=True) set to True if the fitting used source density bins nsubs : int (default=1) number of subgrids used for the physics model partial : boolean (default=False) If True, the output merged files will only have stars that have been run across all subgrids. If stars have only been fit in some subgrids and not others, they will be discarded in the "partial" output files. Currently only implemented for 1D PDFs and stats (not lnP) files. """ # if there's no SD and no subgridding, running this is unnecessary if (not use_sd) and (nsubs == 1): print("No merging necessary") return # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # get file name lists (to check if they exist and/or need to be resumed) file_dict = create_filenames.create_filenames(settings, use_sd=use_sd, nsubs=nsubs) # - input files # photometry_files = file_dict['photometry_files'] # modelsedgrid_files = file_dict['modelsedgrid_files'] # noise_files = file_dict['noise_files'] # - output files stats_files = file_dict["stats_files"] pdf_files = file_dict["pdf_files"] lnp_files = file_dict["lnp_files"] # - other useful info sd_sub_info = file_dict["sd_sub_info"] # gridsub_info = file_dict['gridsub_info'] # the unique sets of gridsub unique_sd_sub = [ x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x) ] # -------------------- # no subgrids # -------------------- if nsubs == 1: out_filebase = "{0}/{0}".format(settings.project) reorder_tags = [ "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub ] merge_beast_stats.merge_stats_files(stats_files, out_filebase, reorder_tag_list=reorder_tags) # -------------------- # use subgrids # -------------------- if nsubs > 1: # runs were split by source density if use_sd: # lists to save the merged file names merged_pdf_files = [] merged_stats_files = [] merged_lnp_files = [] for sd_sub in unique_sd_sub: # indices with the current sd_sub ind = [j for j, x in enumerate(sd_sub_info) if x == sd_sub] # merge the subgrid files for that SD+sub out_filebase = "{0}/bin{1}_sub{2}/{0}_bin{1}_sub{2}".format( settings.project, sd_sub[0], sd_sub[1]) if partial: out_filebase += "_partial" # - 1D PDFs and stats ( merged_pdf1d_fname, merged_stats_fname, ) = subgridding_tools.merge_pdf1d_stats( [pdf_files[j] for j in ind], [stats_files[j] for j in ind], re_run=False, output_fname_base=out_filebase, partial=partial, ) merged_pdf_files.append(merged_pdf1d_fname) merged_stats_files.append(merged_stats_fname) # - lnP files if not partial: merged_lnp_fname = subgridding_tools.merge_lnp( [lnp_files[j] for j in ind], re_run=False, output_fname_base=out_filebase, threshold=-10, ) merged_lnp_files.append(merged_lnp_fname) # merge the merged stats files out_filebase = "{0}/{0}".format(settings.project) reorder_tags = [ "bin{0}_sub{1}".format(x[0], x[1]) for x in unique_sd_sub ] merge_beast_stats.merge_stats_files(merged_stats_files, out_filebase, reorder_tag_list=reorder_tags) # runs weren't split by source density else: out_filebase = "{0}/{0}".format(settings.project) # - 1D PDFs and stats subgridding_tools.merge_pdf1d_stats( pdf_files, stats_files, output_fname_base=out_filebase, partial=partial, ) # - lnP files if not partial: subgridding_tools.merge_lnp( lnp_files, re_run=False, output_fname_base=out_filebase, threshold=-10, )
def beast_production_wrapper(): """ This does all of the steps for a full production run, and can be used as a wrapper to automatically do most steps for multiple fields. * make datamodel.py file * make source density map * make background density map * split catalog by source density * make physics model (SED grid) * make input list for ASTs * make noise model * generate batch script to trim models * generate batch script to fit models * merge stats files back together * spatially reorder the results Places for user to manually do things: * editing code before use - datamodel_template.py: setting up the file with desired parameters - here: list the catalog filter names with the corresponding BEAST names - here: choose settings (pixel size, filter, mag range) for the source density map - here: choose settings (pixel size, reference image) for the background map - here: choose settings (filter, number per file) for dividing catalog by source density - here: choose settings (# files, nice level) for the trimming/fitting batch scripts * process the ASTs, as described in BEAST documentation * run the trimming scripts * run the fitting scripts BEWARE: When running the trimming/fitting scripts, ensure that the correct datamodel.py file is in use. Since it gets updated every time this code is run, you may be unexpectedly be using one from another field. """ # the list of fields field_names = ["15275_IC1613"] # distance moduli and velocities # http://adsabs.harvard.edu/abs/2013AJ....146...86T dist_mod = [24.36] velocity = [-236] # the path+file for a reference image im_path = ["../beast_dwarfs/images/15275_IC1613_F555W_drz.fits.gz"] ref_filter = ["F555W"] # choose a filter to use for removing artifacts # (remove catalog sources with filter_FLAG > 99) flag_filter = ["F555W"] # number of fields n_field = len(field_names) # Need to know what the correspondence is between filter names in the # catalog and the BEAST filter names. # # These will be used to automatically determine the filters present in # each GST file and fill in the datamodel.py file. The order doesn't # matter, as long as the order in one list matches the order in the other # list. # gst_filter_names = [ "F275W", "F336W", "F390M", "F555W", "F814W", "F110W", "F160W" ] beast_filter_names = [ "HST_WFC3_F275W", "HST_WFC3_F336W", "HST_WFC3_F390M", "HST_WFC3_F555W", "HST_WFC3_F814W", "HST_WFC3_F110W", "HST_WFC3_F160W", ] for b in range(n_field): # for b in [0]: print("********") print("field " + field_names[b]) print("********") # ----------------- # data file names # ----------------- # paths for the data/AST files gst_file = "./data/" + field_names[b] + ".gst.fits" ast_file = "./data/" + field_names[b] + ".gst.fake.fits" # path for the reference image (if using for the background map) im_file = im_path[b] # region file with catalog stars # make_region_file(gst_file, ref_filter[b]) # ----------------- # 0. make datamodel file # ----------------- # need to do this first, because otherwise any old version that exists # will be imported, and changes made here won't get imported again print("") print("creating datamodel file") print("") create_datamodel( gst_file, ast_file, gst_filter_names, beast_filter_names, dist_mod[b], velocity[b], ref_image=im_file, ) # load in datamodel to get number of subgrids import datamodel importlib.reload(datamodel) # ----------------- # 1a. make magnitude histograms # ----------------- print("") print("making magnitude histograms") print("") # if not os.path.isfile('./data/'+field_names[b]+'.gst_maghist.pdf'): peak_mags = plot_mag_hist.plot_mag_hist(gst_file, stars_per_bin=70, max_bins=75) # test = plot_mag_hist.plot_mag_hist(ast_file, stars_per_bin=200, max_bins=30) # ----------------- # 1b. make a source density map # ----------------- print("") print("making source density map") print("") # not currently doing background density bins # use_bg_info = True use_bg_info = False if use_bg_info: background_args = types.SimpleNamespace( subcommand="background", catfile=gst_file, pixsize=5, npix=None, reference=im_file, mask_radius=10, ann_width=20, cat_filter=[ref_filter, "90"], ) create_background_density_map.main_make_map(background_args) # but we are doing source density bins! if not os.path.isfile( gst_file.replace(".fits", "_source_den_image.fits")): # - pixel size of 10 arcsec # - use ref_filter[b] between vega mags of 17 and peak_mags[ref_filter[b]]-0.5 sourceden_args = types.SimpleNamespace( subcommand="sourceden", catfile=gst_file, pixsize=5, npix=None, mag_name=ref_filter + "_VEGA", mag_cut=[15, peak_mags[ref_filter - 0.5]], flag_name=flag_filter[b] + '_FLAG', ) create_background_density_map.main_make_map(sourceden_args) # new file name with the source density column gst_file_sd = gst_file.replace(".fits", "_with_sourceden.fits") # ----------------- # 2. make physics model # ----------------- print("") print("making physics model") print("") # see which subgrid files already exist gs_str = "" if datamodel.n_subgrid > 1: gs_str = "sub*" sed_files = glob.glob("./{0}_beast/{0}_beast_seds.grid{1}.hd5".format( field_names[b], gs_str)) # only make the physics model they don't already exist if len(sed_files) < datamodel.n_subgrid: # directly create physics model grids #create_physicsmodel.create_physicsmodel(nprocs=1, nsubs=datamodel.n_subgrid) # create grids with script create_physicsmodel.split_create_physicsmodel( nprocs=1, nsubs=datamodel.n_subgrid) print('\n**** go run physics model code for ' + field_names[b] + '! ****') continue # list of SED files model_grid_files = sorted( glob.glob("./{0}_beast/{0}_beast_seds.grid{1}.hd5".format( field_names[b], gs_str))) # ----------------- # 3. make ASTs # ----------------- # only create an AST input list if the ASTs don't already exist ast_input_file = ("./" + field_names[b] + "_beast/" + field_names[b] + "_beast_inputAST.txt") if not os.path.isfile(ast_file): if not os.path.isfile(ast_input_file): print("") print("creating artificial stars") print("") make_ast_inputs.make_ast_inputs(flux_bin_method=True) split_ast_input_file.split_asts(field_names[b] + "_beast", ast_input_file, 2000) print("\n**** go run ASTs for " + field_names[b] + "! ****\n") continue # ----------------- # 4/5. edit photometry/AST catalogs # ----------------- # remove sources that are # - in regions without full imaging coverage, # - flagged in flag_filter print("") print("editing photometry/AST catalogs") print("") # - photometry gst_file_cut = gst_file.replace(".fits", "_with_sourceden_cut.fits") cut_catalogs.cut_catalogs( gst_file_sd, gst_file_cut, partial_overlap=True, flagged=True, flag_filter=flag_filter[b], region_file=True, ) # - ASTs ast_file_cut = ast_file.replace(".fits", "_cut.fits") cut_catalogs.cut_catalogs( ast_file, ast_file_cut, partial_overlap=True, flagged=True, flag_filter=flag_filter[b], region_file=True, ) # test = plot_mag_hist.plot_mag_hist(ast_file_cut, stars_per_bin=200, max_bins=30) # edit the datamodel.py file to have the correct photometry file name # (AST file name is already automatically the cut version) create_datamodel( gst_file_cut, ast_file_cut, gst_filter_names, beast_filter_names, dist_mod[b], velocity[b], ref_image=im_file, ) # ----------------- # 6. split observations by source density # ----------------- print("") print("splitting observations by source density") print("") # - photometry if len(glob.glob(gst_file_cut.replace('.fits', '*sub*fits'))) == 0: # a smaller value for n_per_file will mean more individual files/runs, # but each run will take a shorter amount of time split_catalog_using_map.split_main( gst_file_cut, ast_file_cut, gst_file.replace('.fits', '_sourceden_map.hd5'), bin_width=1, n_per_file=6250, ) # -- at this point, we can run the code to create lists of filenames file_dict = create_filenames.create_filenames( use_sd=True, nsubs=datamodel.n_subgrid) # figure out how many files there are sd_sub_info = file_dict["sd_sub_info"] # - number of SD bins temp = set([i[0] for i in sd_sub_info]) print("** total SD bins: " + str(len(temp))) # - the unique sets of SD+sub unique_sd_sub = [ x for i, x in enumerate(sd_sub_info) if i == sd_sub_info.index(x) ] print("** total SD subfiles: " + str(len(unique_sd_sub))) # ----------------- # 7. make noise models # ----------------- print("") print("making noise models") print("") # create the noise model (this code will check if it exists) create_obsmodel.create_obsmodel(use_sd=True, nsubs=datamodel.n_subgrid, nprocs=1) # ----------------- # 8. make script to trim models # ----------------- print("") print("setting up script to trim models") print("") # save any at-queue commands at_list = [] # iterate through each model grid for i in range(datamodel.n_subgrid): # gst list temp = file_dict["photometry_files"] gst_input_list = [ x for i, x in enumerate(temp) if i == temp.index(x) ] # create corresponding files for each of those ast_input_list = [] noise_files = [] trim_prefix = [] for j in range(len(gst_input_list)): # get the sd/sub number curr_sd = unique_sd_sub[j][0] curr_sub = unique_sd_sub[j][1] subfolder = "bin{0}_sub{1}".format(curr_sd, curr_sub) # create file names ast_input_list.append( ast_file_cut.replace(".fits", "_bin" + curr_sd + ".fits")) if datamodel.n_subgrid > 1: noise_files.append( "./{0}_beast/{0}_beast_noisemodel_bin{1}.gridsub{2}.hd5" .format(field_names[b], curr_sd, i)) trim_prefix.append( "./{0}_beast/{1}/{0}_beast_{1}_gridsub{2}".format( field_names[b], subfolder, i)) if datamodel.n_subgrid == 1: noise_files.append(file_dict['noise_files'][j]) trim_prefix.append("./{0}_beast/{0}_beast_{1}".format( field_names[b], subfolder)) # check if the trimmed grids exist before moving on if datamodel.n_subgrid > 1: trim_files = sorted( glob.glob( "./{0}_beast/bin*_sub*/{0}_beast_*_gridsub{1}_sed_trim.grid.hd5" .format(field_names[b], i))) if datamodel.n_subgrid == 1: trim_files = sorted( glob.glob("./{0}_beast/{0}_beast_*_sub*_sed_trim.grid.hd5". format(field_names[b]))) if len(trim_files) < len(gst_input_list): job_path = "./" + field_names[b] + "_beast/trim_batch_jobs/" if datamodel.n_subgrid > 1: file_prefix = "BEAST_gridsub" + str(i) if datamodel.n_subgrid == 1: file_prefix = "BEAST" # generate trimming at-queue commands setup_batch_beast_trim.generic_batch_trim( model_grid_files[i], noise_files, gst_input_list, ast_input_list, trim_prefix, job_path=job_path, file_prefix=file_prefix, num_subtrim=1, nice=19, prefix="source activate b13", ) at_list.append("at -f " + job_path + file_prefix + "_batch_trim.joblist now") if len(at_list) > 0: print("\n**** go run trimming code for " + field_names[b] + "! ****") print("Here are the command(s) to run:") for cmd in at_list: print(cmd) return else: print("all files are trimmed for " + field_names[b]) # ----------------- # 9. make script to fit models # ----------------- print("") print("setting up script to fit models") print("") fit_run_info = setup_batch_beast_fit.setup_batch_beast_fit( num_percore=1, nice=19, overwrite_logfile=False, prefix="source activate b13", use_sd=True, nsubs=datamodel.n_subgrid, nprocs=1, ) # check if the fits exist before moving on tot_remaining = len(fit_run_info["done"]) - np.sum( fit_run_info["done"]) if tot_remaining > 0: print("\n**** go run fitting code for " + field_names[b] + "! ****") print("Here are the " + str(len(fit_run_info["files_to_run"])) + " commands to run:") for job_file in fit_run_info["files_to_run"]: print("at -f ./" + job_file + " now") continue else: print("all fits are complete for " + field_names[b]) # ----------------- # 10. merge stats files from each fit # ----------------- print("") print("merging stats files") print("") merge_files.merge_files(use_sd=True, nsubs=datamodel.n_subgrid)
def setup_batch_beast_fit( beast_settings_info, num_percore=5, nice=None, overwrite_logfile=True, prefix=None, use_sd=True, pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'], nsubs=1, nprocs=1, ): """ Sets up batch files for submission to the 'at' queue on linux (or similar) systems Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance num_percore : int (default = 5) number of fitting runs per core nice : int (default = None) set this to an integer (-20 to 20) to prepend a "nice" level to the fitting command overwrite_logfile : boolean (default = True) if True, will overwrite the log file; if False, will append to existing log file prefix : string (default=None) Set this to a string (such as 'source activate astroconda') to prepend to each batch file (use '\n's to make multiple lines) use_sd : boolean (default=True) If True, split runs based on source density (determined by finding matches to settings.astfile with SD info) pdf2d_param_list : list of strings or None If set, do 2D PDFs of these parameters. If None, don't make 2D PDFs. nsubs : int (default=1) number of subgrids used for the physics model nprocs : int (default=1) Number of parallel processes to use when doing the fitting (currently only implemented for subgrids) Returns ------- run_info_dict : dict Dictionary indicating which catalog files have complete modeling, and which job files need to be run """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # setup the subdirectory for the batch and log files job_path = settings.project + "/fit_batch_jobs/" if not os.path.isdir(job_path): os.mkdir(job_path) log_path = job_path + "logs/" if not os.path.isdir(log_path): os.mkdir(log_path) # get file name lists (to check if they exist and/or need to be resumed) file_dict = create_filenames.create_filenames(settings, use_sd=use_sd, nsubs=nsubs) # - input files photometry_files = file_dict["photometry_files"] # modelsedgrid_files = file_dict['modelsedgrid_files'] # noise_files = file_dict['noise_files'] # - output files stats_files = file_dict["stats_files"] pdf_files = file_dict["pdf_files"] lnp_files = file_dict["lnp_files"] # - total number of files n_files = len(photometry_files) # - other useful info sd_sub_info = file_dict["sd_sub_info"] gridsub_info = file_dict["gridsub_info"] # names of output log files log_files = [] # initialize a variable name (otherwise it got auto-added in the wrong # place and broke the code) pf = None for i in range(n_files): sd_piece = "" if use_sd is True: sd_piece = "_bin" + sd_sub_info[i][0] + "_sub" + sd_sub_info[i][1] gridsub_piece = "" if nsubs > 1: gridsub_piece = "_gridsub" + str(gridsub_info[i]) log_files.append("beast_fit" + sd_piece + gridsub_piece + ".log") # start making the job files! pf_open = False cur_f = 0 cur_total_size = 0.0 j = -1 # keep track of which files are done running run_info_dict = { "phot_file": photometry_files, "done": np.full(n_files, False), "files_to_run": [], } for i, phot_file in enumerate(photometry_files): print("") # check if this is a full run reg_run = False run_done = False if not os.path.isfile(stats_files[i]): reg_run = True print("no stats file") if not os.path.isfile(pdf_files[i]): reg_run = True print("no pdf1d file") if not os.path.isfile(lnp_files[i]): reg_run = True print("no lnp file") # first check if the pdf1d mass spacing is correct if not reg_run: hdulist = fits.open(pdf_files[i]) delta1 = hdulist["M_ini"].data[-1, 1] - hdulist["M_ini"].data[-1, 0] if delta1 > 1.0: # old linear spacing print("pdf1d lin mass spacing - full refitting needed") old_mass_spacing = True else: old_mass_spacing = False print("pdf1d log mass spacing - ok") if old_mass_spacing: run_done = False reg_run = True # now check if the number of results is the same as # the number of observations if not reg_run: # get the observed catalog obs = Table.read(phot_file) # get the fit results catalog t = Table.read(stats_files[i], hdu=1) # get the number of stars that have been fit (indxs, ) = np.where(t["Pmax"] != 0.0) # get the number of entries in the lnp file f = tables.open_file(lnp_files[i], "r") nlnp = f.root._v_nchildren - 2 f.close() print("# obs, stats, lnp = ", len(obs), len(indxs), nlnp) if (len(indxs) == len(obs)) & (nlnp == len(obs)): # final check, is the pdf1d file correctly populated tot_prob = np.sum(hdulist["M_ini"].data, axis=1) (tindxs, ) = np.where(tot_prob > 0.0) print("# good pdf1d = ", len(tindxs) - 1) if len(tindxs) == (len(obs) + 1): run_done = True if run_done: print(stats_files[i] + " done") run_info_dict["done"][i] = True else: j += 1 if j % num_percore == 0: cur_f += 1 # close previous files if j != 0: pf.close() # slurm needs the job file to be executable # flake8/codestyle error ignored as this if statement only executed # for j > 0 and appropriate joblist_file defined in j - 1 os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH) # noqa: F821 print( "total sed_trim size [Gb] = ", cur_total_size / (1024.0 * 1024.0 * 1024.0), ) cur_total_size = 0.0 # open the slurm and param files pf_open = True joblist_file = job_path + "beast_batch_fit_" + str( cur_f) + ".joblist" pf = open(joblist_file, "w") run_info_dict["files_to_run"].append(joblist_file) # write out anything at the beginning of the file if prefix is not None: pf.write(prefix + "\n") # flag for resuming resume_str = "" if reg_run: print(stats_files[i] + " does not exist " + "- adding job as a regular fit job (not resume job)") else: print(stats_files[i] + " not done - adding to continue fitting list (" + str(len(indxs)) + "/" + str(len(t["Pmax"])) + ")") resume_str = "-r" # prepend a `nice` value nice_str = "" if nice is not None: nice_str = "nice -n" + str(int(nice)) + " " # choose whether to append or overwrite log file pipe_str = " > " if not overwrite_logfile: pipe_str = " >> " # set SD+sub option sd_str = "" if use_sd is True: sd_str = ' --choose_sd_sub "{0}" "{1}" '.format( sd_sub_info[i][0], sd_sub_info[i][1]) # set gridsub option gs_str = "" if nsubs > 1: gs_str = " --choose_subgrid {0} ".format(gridsub_info[i]) # set 2D PDF option if pdf2d_param_list is None: pdf2d_str = "None" else: pdf2d_str = " " + " ".join(pdf2d_param_list) + " " job_command = (nice_str + "python -m beast.tools.run.run_fitting " + " {0} ".format(settings.settings_file) + resume_str + sd_str + gs_str + " --nsubs " + str(nsubs) + " --nprocs " + str(nprocs) + " --pdf2d_param_list " + pdf2d_str + pipe_str + log_path + log_files[i]) pf.write(job_command + "\n") if pf_open: pf.close() # slurm needs the job file to be executable os.chmod(joblist_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IROTH) # return the info about completed modeling return run_info_dict
def beast_verification_wrapper(): """ This wrapper does the processing for BEAST verification Parameter recovery * create simulated data for a given model grid + noise model * generate batch script to trim models * generate batch script to fit models * merge stats files back together Places for user to manually do things: * editing code before use - beast_settings_template.py: setting up the file with desired parameters - here: list the catalog filter names with the corresponding BEAST names - here: number of simulated stars to generate - here: choose settings (# files, nice level) for the trimming/fitting batch scripts * process the ASTs, as described in BEAST documentation * run the trimming scripts * run the fitting scripts """ # the list of fields field_names = ["15275_IC1613"] # distance moduli and velocities # http://adsabs.harvard.edu/abs/2013AJ....146...86T dist_mod = [24.36] velocity = [-236] # the path+file for a reference image im_path = ["../beast_dwarfs/images/15275_IC1613_F555W_drz.fits.gz"] ref_filter = ["F555W"] # number of fields n_field = len(field_names) # Need to know what the correspondence is between filter names in the # catalog and the BEAST filter names. # # These will be used to automatically determine the filters present in # each GST file and fill in the beast settings file. The order doesn't # matter, as long as the order in one list matches the order in the other # list. # gst_filter_names = [ "F275W", "F336W", "F390M", "F555W", "F814W", "F110W", "F160W" ] beast_filter_names = [ "HST_WFC3_F275W", "HST_WFC3_F336W", "HST_WFC3_F390M", "HST_WFC3_F555W", "HST_WFC3_F814W", "HST_WFC3_F110W", "HST_WFC3_F160W", ] for b in range(n_field): # for b in [0]: print("********") print("field " + field_names[b]) print("********") # ----------------- # 0. get original file names # ----------------- print('') print('retrieving original file names') print('') # paths for the data/AST files gst_file_orig = './data/' + field_names[ b] + '.gst_with_sourceden_cut.fits' ast_file_orig = './data/' + field_names[b] + '.gst.fake_cut.fits' # path for the reference image (if using for the background map) im_file = im_path[b] create_beast_settings( gst_file_orig, ast_file_orig, gst_filter_names, beast_filter_names, dist_mod[b], velocity[b], ref_image=im_file, proj_type='beast', ) # load in beast settings to get number of subgrids settings = beast_settings.beast_settings("beast_settings_" + field_names[i] + "_beast.txt") # grab relevant file names file_dict = create_filenames.create_filenames( settings, use_sd=True, nsubs=settings.n_subgrid, ) modelsedgrid_files = file_dict['modelsedgrid_files'] noise_files = file_dict['noise_files'] sd_sub_info = file_dict["sd_sub_info"] tot_files = len(sd_sub_info) # ----------------- # 1. create simulated data # ----------------- print('') print('simulating data') print('') gst_file = gst_file_orig.replace('.gst', '.sim.gst') gst_subfile_form = gst_file.replace('.fits', '_bin*_sub0.fits') # loop through all files # only grab files and simulate data when we're at sd_sub = [N,0] for i in range(tot_files): # find matches to sd_sub = [i,0] inds_to_use = [ ind for ind in range(tot_files) if sd_sub_info[ind] == [str(i), '0'] ] # if there are matches, use those corresponding files if len(inds_to_use) > 0: output_catalog = gst_subfile_form.replace( '_bin*', '_bin' + str(i)) if not os.path.isfile(output_catalog): print('generating simulated observations for bin=' + str(i)) grid_sublist = [modelsedgrid_files[x] for x in inds_to_use] noise_sublist = [noise_files[x] for x in inds_to_use] simulate_obs.simulate_obs( grid_sublist, noise_sublist, output_catalog, nsim=5000, compl_filter=ref_filter[b], ) else: print('simulated observations already exist for bin=' + str(i)) # combine them all into one catalog if not os.path.isfile(gst_file): table_list = [] for cat_file in glob.glob(gst_subfile_form): table_list.append(Table.read(cat_file)) vstack(table_list).write(gst_file, overwrite=True) # ----------------- # 2. make new settings file # ----------------- print('') print('creating beast settings file') print('') create_beast_settings( gst_file, ast_file_orig, gst_filter_names, beast_filter_names, dist_mod[b], velocity[b], ref_image=im_file, proj_type='sim', ) # load in beast settings again settings = beast_settings.beast_settings("beast_settings_" + field_names[i] + "_sim.txt") # ----------------- # 3. make symbolic links to model grids and noise models # ----------------- # make new directory if not os.path.isdir('./' + settings.project): os.mkdir('./' + settings.project) # symlink the physics/noise models orig_phys = list(set(modelsedgrid_files)) for grid in orig_phys: source = os.path.abspath(grid) dest = os.path.abspath(grid.replace('_beast', '_sim')) if not os.path.islink(dest): os.symlink(source, dest) orig_noise = list(set(noise_files)) for grid in orig_noise: source = os.path.abspath(grid) dest = os.path.abspath(grid.replace('_beast', '_sim')) if not os.path.islink(dest): os.symlink(source, dest) # ----------------- # 4. make script to trim models # ----------------- print("") print("setting up script to trim models") print("") job_file_list = make_trim_scripts.make_trim_scripts( settings, num_subtrim=1, prefix='source activate b13') if len(job_file_list) > 0: print('\n**** go run trimming code for ' + field_names[b] + '! ****') print('Here are the command(s) to run:') for job in job_file_list: print('at -f ' + job + ' now') return else: print('all files are trimmed for ' + field_names[b]) # ----------------- # 5. make script to fit models # ----------------- print("") print("setting up script to fit models") print("") fit_run_info = setup_batch_beast_fit.setup_batch_beast_fit( settings, num_percore=1, nice=19, overwrite_logfile=False, prefix="source activate b13", use_sd=True, nsubs=settings.n_subgrid, nprocs=1, ) # check if the fits exist before moving on tot_remaining = len(fit_run_info["done"]) - np.sum( fit_run_info["done"]) if tot_remaining > 0: print("\n**** go run fitting code for " + field_names[b] + "! ****") print("Here are the " + str(len(fit_run_info["files_to_run"])) + " commands to run:") for job_file in fit_run_info["files_to_run"]: print("at -f ./" + job_file + " now") continue else: print("all fits are complete for " + field_names[b]) # ----------------- # 6. plots # ----------------- print('') print('making plots') print('') # grab relevant file names file_dict = create_filenames.create_filenames( use_sd=True, nsubs=settings.n_subgrid, ) plot_param_recovery.plot_param_recovery( file_dict['photometry_files'], file_dict['stats_files'], field_names[b] + '_param_recovery.pdf', max_nbins=20, ) for stats_file in file_dict['stats_files']: plot_param_err.plot(stats_file, n_bins=10) plot_triangle.plot_triangle(stats_file)
def run_fitting( use_sd=True, nsubs=1, nprocs=1, choose_sd_sub=None, choose_subgrid=None, pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'], resume=False, ): """ Run the fitting. If nsubs > 1, this will find existing subgrids. If use_sd is True, will also incorporate source density info. The additional choose_* options are to make queue scripts usable, by specifying a given SD+sub and/or subgrid for the fitting run. Parameters ---------- use_sd : boolean (default=True) If True, create source density dependent noise models (determined by finding matches to datamodel.astfile with SD info) nsubs : int (default=1) number of subgrids used for the physics model nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) choose_sd_sub : list of two strings (default=None) If this is set, the fitting will just be for this combo of SD+sub, rather than all of them. Overrides use_sd. format of the list: ['#','#'] choose_subgrid : int (default=None) If this is set, the fitting with just be for this subgrid index. If nsubs=1, this is ignored. pdf2d_param_list : list of strings or None If set, do 2D PDFs of these parameters. If None, don't make 2D PDFs. resume : boolean (default=False) choose whether to resume existing run or start over """ # before doing ANYTHING, force datamodel to re-import (otherwise, any # changes within this python session will not be loaded!) importlib.reload(datamodel) # check input parameters verify_params.verify_input_format(datamodel) # keep track of time start_time = time.clock() # -------------------- # make lists of file names # -------------------- file_dict = create_filenames.create_filenames( use_sd=use_sd, nsubs=nsubs, choose_sd_sub=choose_sd_sub, choose_subgrid=choose_subgrid, ) # input files photometry_files = file_dict["photometry_files"] # modelsedgrid_files = file_dict["modelsedgrid_files"] modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"] # noise_files = file_dict["noise_files"] noise_trim_files = file_dict["noise_trim_files"] # output files stats_files = file_dict["stats_files"] pdf_files = file_dict["pdf_files"] pdf2d_files = file_dict["pdf2d_files"] if pdf2d_param_list is None: pdf2d_files = [None for i in range(len(pdf2d_files))] lnp_files = file_dict["lnp_files"] # total number of files n_files = len(photometry_files) # other potentially useful info sd_sub_info = file_dict["sd_sub_info"] # gridsub_info = file_dict['gridsub_info'] # if using subgrids, make the grid dictionary file: # File where the ranges and number of unique values for the grid # will be stored (this can take a while to calculate) if nsubs > 1: gridpickle_files = file_dict["gridpickle_files"] for i in range(len(gridpickle_files)): if not os.path.isfile(gridpickle_files[i]): # list of corresponding SED grids and noise models # - with SD+sub: get file list for ALL subgrids at current SD+sub if use_sd or (choose_sd_sub is not None): temp = create_filenames.create_filenames( nsubs=nsubs, choose_sd_sub=sd_sub_info[i], choose_subgrid=None ) modelsedgrid_trim_list = temp["modelsedgrid_trim_files"] noise_trim_list = temp["noise_trim_files"] # - no SD info: get file list for ALL subgrids else: temp = create_filenames.create_filenames( use_sd=False, nsubs=nsubs, choose_subgrid=None ) modelsedgrid_trim_list = temp["modelsedgrid_trim_files"] noise_trim_list = temp["noise_trim_files"] # create the grid info dictionary print("creating grid_info_dict for " + gridpickle_files[i]) grid_info_dict = subgridding_tools.reduce_grid_info( modelsedgrid_trim_list, noise_trim_list, nprocs=nprocs ) # save it with open(gridpickle_files[i], "wb") as p: pickle.dump(grid_info_dict, p) print("wrote grid_info_dict to " + gridpickle_files[i]) # -------------------- # do the fitting! # -------------------- # set up function inputs if nsubs == 1: input_list = [ ( photometry_files[i], modelsedgrid_trim_files[i], noise_trim_files[i], stats_files[i], pdf_files[i], pdf2d_files[i], pdf2d_param_list, lnp_files[i], None, resume, ) for i in range(n_files) ] if nsubs > 1: input_list = [ ( photometry_files[i], modelsedgrid_trim_files[i], noise_trim_files[i], stats_files[i], pdf_files[i], pdf2d_files[i], pdf2d_param_list, lnp_files[i], gridpickle_files[i], resume, ) for i in range(n_files) ] # run the fitting (via parallel wrapper) parallel_wrapper(fit_submodel, input_list, nprocs=nprocs) # see how long it took! new_time = time.clock() print("time to fit: ", (new_time - start_time) / 60.0, " min")
def make_trim_scripts( beast_settings_info, num_subtrim=1, nice=None, prefix=None, ): """ `setup_batch_beast_trim.py` uses file names to create batch trim files. This generates all of the file names for that function. NOTE: This assumes you're using source density or background dependent noise models. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance num_subtrim : int (default = 1) number of trim batch jobs nice : int (default = None) set this to an integer (-20 to 20) to prepend a "nice" level to the trimming command prefix : string (default=None) Set this to a string (such as 'source activate astroconda') to prepend to each batch file (use '\n's to make multiple lines) Returns ------- job_files : list of strings Names of the newly created job files """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # make lists of file names file_dict = create_filenames.create_filenames( settings, use_sd=True, nsubs=settings.n_subgrid, ) # extract some useful ones photometry_files = file_dict["photometry_files"] modelsedgrid_files = file_dict["modelsedgrid_files"] noise_files = file_dict["noise_files"] modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"] noise_trim_files = file_dict["noise_trim_files"] # the unique sets of things unique_sedgrid = [ x for i, x in enumerate(modelsedgrid_files) if i == modelsedgrid_files.index(x) ] # save the list of job files job_file_list = [] # iterate through each model grid for i in range(settings.n_subgrid): # indices for this model grid grid_ind = [ ind for ind, mod in enumerate(modelsedgrid_files) if mod == unique_sedgrid[i] ] # create corresponding files for each of those input_noise = [noise_files[ind] for ind in grid_ind] input_phot = [photometry_files[ind] for ind in grid_ind] # to get the trim prefix, find the common string between trimmed noise # files and trimmed SED files input_trim_prefix = [] for ind in grid_ind: str1 = modelsedgrid_trim_files[ind] str2 = noise_trim_files[ind] # find longest match match = SequenceMatcher(None, str1, str2).find_longest_match( 0, len(str1), 0, len(str2)) # grab that substring (and remove trailing "_") input_trim_prefix.append(str1[match.a:match.a + match.size][:-1]) # check if the trimmed grids exist before moving on check_trim = [ os.path.isfile(noise_trim_files[ind]) for ind in grid_ind ] # if any aren't trimmed for this model grid, set up trimming if np.sum(check_trim) < len(input_noise): job_path = "./{0}/trim_batch_jobs/".format(settings.project) if settings.n_subgrid > 1: file_prefix = "BEAST_gridsub" + str(i) if settings.n_subgrid == 1: file_prefix = "BEAST" # generate trimming at-queue commands setup_batch_beast_trim.generic_batch_trim( unique_sedgrid[i], input_noise, input_phot, input_trim_prefix, settings.obs_colnames, job_path=job_path, file_prefix=file_prefix, num_subtrim=num_subtrim, nice=nice, prefix=prefix, ) job_file_list.append(job_path + file_prefix + "_batch_trim.joblist") return job_file_list
def generate_files_for_tests(run_beast=True, run_tools=True): """ Use the metal_small example to generate a full set of files for the BEAST regression tests. Parameters ---------- run_beast : boolean (default=True) if True, run the BEAST run_tools : boolean (default=True) if True, run the code to generate things for tools """ # read in BEAST settings settings_orig = beast_settings.beast_settings("beast_settings.txt") # also make a version with subgrids settings_subgrids = copy.deepcopy(settings_orig) settings_subgrids.n_subgrid = 2 settings_subgrids.project = f"{settings_orig.project}_subgrids" # ========================================== # run the beast for each set of settings # ========================================== if run_beast: for settings in [settings_orig, settings_subgrids]: # ----------------- # physics model # ----------------- create_physicsmodel.create_physicsmodel( settings, nsubs=settings.n_subgrid, nprocs=1, ) # ----------------- # ASTs # ----------------- # currently only works for no subgrids if settings.n_subgrid == 1: make_ast_inputs.make_ast_inputs(settings, pick_method="flux_bin_method") # ----------------- # obs model # ----------------- create_obsmodel.create_obsmodel( settings, use_sd=False, nsubs=settings.n_subgrid, nprocs=1, use_rate=True, ) # ----------------- # trimming # ----------------- # make file names file_dict = create_filenames.create_filenames( settings, use_sd=False, nsubs=settings.n_subgrid) # read in the observed data obsdata = Observations(settings.obsfile, settings.filters, settings.obs_colnames) for i in range(settings.n_subgrid): # get the modesedgrid on which to generate the noisemodel modelsedgridfile = file_dict["modelsedgrid_files"][i] modelsedgrid = SEDGrid(modelsedgridfile) # read in the noise model just created noisemodel_vals = noisemodel.get_noisemodelcat( file_dict["noise_files"][i]) # trim the model sedgrid sed_trimname = file_dict["modelsedgrid_trim_files"][i] noisemodel_trimname = file_dict["noise_trim_files"][i] trim_grid.trim_models( modelsedgrid, noisemodel_vals, obsdata, sed_trimname, noisemodel_trimname, sigma_fac=3.0, ) # ----------------- # fitting # ----------------- run_fitting.run_fitting( settings, use_sd=False, nsubs=settings.n_subgrid, nprocs=1, pdf2d_param_list=["Av", "M_ini", "logT"], pdf_max_nbins=200, ) # ----------------- # merging # ----------------- # it'll automatically skip for no subgrids merge_files.merge_files(settings, use_sd=False, nsubs=settings.n_subgrid) print("\n\n") # ========================================== # reference files for assorted tools # ========================================== if run_tools: # ----------------- # compare_spec_type # ----------------- # the input settings input = { "spec_ra": [72.67213351], "spec_dec": [-67.71720515], "spec_type": ["A"], "spec_subtype": [0], "lumin_class": ["IV"], "match_radius": 0.2, } # run it output = compare_spec_type.compare_spec_type( settings_orig.obsfile, "{0}/{0}_stats.fits".format(settings_orig.project), **input, ) # save the inputs and outputs asdf.AsdfFile({ "input": input, "output": output }).write_to("{0}/{0}_compare_spec_type.asdf".format( settings_orig.project)) # ----------------- # star_type_probability # ----------------- # input settings input = { "output_filebase": None, "ext_O_star_params": { "min_M_ini": 10, "min_Av": 0.5, "max_Av": 5 }, } # run it output = star_type_probability.star_type_probability( "{0}/{0}_pdf1d.fits".format(settings_orig.project), "{0}/{0}_pdf2d.fits".format(settings_orig.project), **input, ) # save the inputs and outputs asdf.AsdfFile({ "input": input, "output": output }).write_to("{0}/{0}_star_type_probability.asdf".format( settings_orig.project)) # ========================================== # asdf file permissions # ========================================== # for unknown reasons, asdf currently writes files with permissions set # to -rw-------. This changes it to -rw-r--r-- (like the rest of the # BEAST files) so Karl can easily copy them over to the cached file # website. # list of asdf files asdf_files = glob.glob("*/*.asdf") # go through each one to change permissions for fname in asdf_files: os.chmod(fname, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)