def create_physicsmodel(nsubs=1, nprocs=1, subset=[None, None]): """ Create the physics model grid. If nsubs > 1, this will make sub-grids. Parameters ---------- nsubs : int (default=1) number of subgrids to split the physics model into nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) subset : list of two ints (default=[None,None]) Only process subgrids in the range [start,stop]. (only relevant if nsubs > 1) """ # before doing ANYTHING, force datamodel to re-import (otherwise, any # changes within this python session will not be loaded!) importlib.reload(datamodel) # check input parameters verify_params.verify_input_format(datamodel) # filename for the SED grid modelsedgrid_filename = "%s/%s_seds.grid.hd5" % ( datamodel.project, datamodel.project, ) # grab the current subgrid slice subset_slice = slice(subset[0], subset[1]) # make sure the project directory exists create_project_dir(datamodel.project) # download and load the isochrones (iso_fname, oiso) = make_iso_table( datamodel.project, oiso=datamodel.oiso, logtmin=datamodel.logt[0], logtmax=datamodel.logt[1], dlogt=datamodel.logt[2], z=datamodel.z, ) # remove the isochrone points with logL=-9.999 oiso = ezIsoch(oiso.selectWhere("*", "logL > -9")) if hasattr(datamodel, "add_spectral_properties_kwargs"): extra_kwargs = datamodel.add_spectral_properties_kwargs else: extra_kwargs = None if hasattr(datamodel, "velocity"): redshift = (datamodel.velocity / const.c).decompose().value else: redshift = 0 # generate the spectral library (no dust extinction) (spec_fname, g_spec) = make_spectral_grid( datamodel.project, oiso, osl=datamodel.osl, redshift=redshift, distance=datamodel.distances, distance_unit=datamodel.distance_unit, extLaw=datamodel.extLaw, add_spectral_properties_kwargs=extra_kwargs, ) # add the stellar priors as weights # also computes the grid weights for the stellar part (pspec_fname, g_pspec) = add_stellar_priors( datamodel.project, g_spec, age_prior_model=datamodel.age_prior_model, mass_prior_model=datamodel.mass_prior_model, met_prior_model=datamodel.met_prior_model, ) # -------------------- # no subgrids # -------------------- if nsubs == 1: # generate the SED grid by integrating the filter response functions # effect of dust extinction applied before filter integration # also computes the dust priors as weights make_extinguished_sed_grid( datamodel.project, g_pspec, datamodel.filters, extLaw=datamodel.extLaw, av=datamodel.avs, rv=datamodel.rvs, fA=datamodel.fAs, rv_prior_model=datamodel.rv_prior_model, av_prior_model=datamodel.av_prior_model, fA_prior_model=datamodel.fA_prior_model, spec_fname=modelsedgrid_filename, add_spectral_properties_kwargs=extra_kwargs, ) # -------------------- # use subgrids # -------------------- if nsubs > 1: # Work with the whole grid up to there (otherwise, priors need a # rework - they don't like having only a subset of the parameter # space, especially when there's only one age for example) # Make subgrids, by splitting the spectral grid into equal sized pieces custom_sub_pspec = subgridding_tools.split_grid(pspec_fname, nsubs) file_prefix = "{0}/{0}_".format(datamodel.project) # function to process the subgrids individually def gen_subgrid(i, sub_name): sub_g_pspec = FileSEDGrid(sub_name) sub_seds_fname = "{}seds.gridsub{}.hd5".format(file_prefix, i) # generate the SED grid by integrating the filter response functions # effect of dust extinction applied before filter integration # also computes the dust priors as weights (sub_seds_fname, sub_g_seds) = make_extinguished_sed_grid( datamodel.project, sub_g_pspec, datamodel.filters, extLaw=datamodel.extLaw, av=datamodel.avs, rv=datamodel.rvs, fA=datamodel.fAs, rv_prior_model=datamodel.rv_prior_model, av_prior_model=datamodel.av_prior_model, fA_prior_model=datamodel.fA_prior_model, add_spectral_properties_kwargs=extra_kwargs, seds_fname=sub_seds_fname, ) return sub_seds_fname # run the above function par_tuples = [ (i, sub_name) for i, sub_name in enumerate(custom_sub_pspec) ][subset_slice] parallel_wrapper(gen_subgrid, par_tuples, nprocs=nprocs) # Save a list of subgrid names that we expect to see required_names = [ "{}seds.gridsub{}.hd5".format(file_prefix, i) for i in range(nsubs) ] outdir = os.path.join(".", datamodel.project) subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt") with open(subgrid_names_file, "w") as fname_file: for fname in required_names: fname_file.write(fname + "\n")
def create_obsmodel(use_sd=True, nsubs=1, nprocs=1, subset=[None, None], use_rate=True): """ Create the observation models. If nsubs > 1, this will find existing subgrids. If use_sd is True, will also incorporate source density info. Parameters ---------- use_sd : boolean (default=True) If True, create source density dependent noise models (determined by finding matches to datamodel.astfile with SD info) nsubs : int (default=1) number of subgrids used for the physics model nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) subset : list of two ints (default=[None,None]) Only process subgrids in the range [start,stop]. (only relevant if nsubs > 1) use_rate : boolean (default=True) Choose whether to use the rate or magnitude when creating the noise model. This should always be True, but is currently an option to be compatible with the phat_small example (which has no rate info). When that gets fixed, please remove this option! """ # before doing ANYTHING, force datamodel to re-import (otherwise, any # changes within this python session will not be loaded!) importlib.reload(datamodel) # check input parameters verify_params.verify_input_format(datamodel) # -------------------- # figure out if there are source density bins # -------------------- ast_file_list = sorted( glob.glob(datamodel.astfile.replace(".fits", "*_bin*"))) if use_sd and (len(ast_file_list) > 0): sd_list = [] for ast_file in ast_file_list: dpos = ast_file.rfind("_bin") ppos = ast_file.rfind(".") sd_list.append(ast_file[dpos + 4:ppos]) print("sd list: ", sd_list) else: # if there are no ASTs with source densities, the flag should be "false" use_sd = False # -------------------- # no subgrids # -------------------- if nsubs == 1: modelsedgridfile = "{0}/{0}_seds.grid.hd5".format(datamodel.project) # if we're splitting by source density if use_sd: input_list = [(modelsedgridfile, curr_sd) for curr_sd in sd_list] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs) # if we're not splitting by source density else: input_list = [(modelsedgridfile, None, use_rate)] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs) # -------------------- # use subgrids # -------------------- if nsubs > 1: # get the list of physics model files outdir = os.path.join(".", datamodel.project) subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt") modelsedgridfiles = get_modelsubgridfiles(subgrid_names_file)[slice( subset[0], subset[1])] # if we're splitting by source density if use_sd: input_list = [(sedfile, curr_sd) for sedfile in modelsedgridfiles for curr_sd in sd_list] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs) # if we're not splitting by source density else: input_list = [(sedfile, None) for sedfile in modelsedgridfiles] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)
def create_physicsmodel(beast_settings_info, nsubs=1, nprocs=1, subset=[None, None]): """ Create the physics model grid. If nsubs > 1, this will make sub-grids. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance nsubs : int (default=1) number of subgrids to split the physics model into nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) subset : list of two ints (default=[None,None]) Only process subgrids in the range [start,stop]. (only relevant if nsubs > 1) """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # filename for the SED grid modelsedgrid_filename = "%s/%s_seds.grid.hd5" % ( settings.project, settings.project, ) # grab the current subgrid slice subset_slice = slice(subset[0], subset[1]) # make sure the project directory exists create_project_dir(settings.project) # download and load the isochrones (iso_fname, oiso) = make_iso_table( settings.project, oiso=settings.oiso, logtmin=settings.logt[0], logtmax=settings.logt[1], dlogt=settings.logt[2], z=settings.z, ) if hasattr(settings, "add_spectral_properties_kwargs"): extra_kwargs = settings.add_spectral_properties_kwargs else: extra_kwargs = None if hasattr(settings, "velocity"): redshift = (settings.velocity / const.c).decompose().value else: redshift = 0 # generate the spectral library (no dust extinction) (spec_fname, g_spec) = make_spectral_grid( settings.project, oiso, osl=settings.osl, redshift=redshift, distance=settings.distances, distance_unit=settings.distance_unit, extLaw=settings.extLaw, add_spectral_properties_kwargs=extra_kwargs, ) # add the stellar priors as weights # also computes the grid weights for the stellar part (pspec_fname, g_pspec) = add_stellar_priors( settings.project, g_spec, age_prior_model=settings.age_prior_model, mass_prior_model=settings.mass_prior_model, met_prior_model=settings.met_prior_model, distance_prior_model=settings.distance_prior_model, ) # -------------------- # no subgrids # -------------------- if nsubs == 1: # generate the SED grid by integrating the filter response functions # effect of dust extinction applied before filter integration # also computes the dust priors as weights make_extinguished_sed_grid( settings.project, g_pspec, settings.filters, extLaw=settings.extLaw, av=settings.avs, rv=settings.rvs, fA=settings.fAs, rv_prior_model=settings.rv_prior_model, av_prior_model=settings.av_prior_model, fA_prior_model=settings.fA_prior_model, spec_fname=modelsedgrid_filename, add_spectral_properties_kwargs=extra_kwargs, ) # -------------------- # use subgrids # -------------------- if nsubs > 1: # Work with the whole grid up to there (otherwise, priors need a # rework - they don't like having only a subset of the parameter # space, especially when there's only one age for example) # Make subgrids, by splitting the spectral grid into equal sized pieces custom_sub_pspec = subgridding_tools.split_grid(pspec_fname, nsubs) file_prefix = "{0}/{0}_".format(settings.project) # function to process the subgrids individually def gen_subgrid(i, sub_name): sub_g_pspec = SpectralGrid(sub_name) sub_seds_fname = "{}seds.gridsub{}.hd5".format(file_prefix, i) # generate the SED grid by integrating the filter response functions # effect of dust extinction applied before filter integration # also computes the dust priors as weights (sub_seds_fname, sub_g_seds) = make_extinguished_sed_grid( settings.project, sub_g_pspec, settings.filters, extLaw=settings.extLaw, av=settings.avs, rv=settings.rvs, fA=settings.fAs, rv_prior_model=settings.rv_prior_model, av_prior_model=settings.av_prior_model, fA_prior_model=settings.fA_prior_model, add_spectral_properties_kwargs=extra_kwargs, seds_fname=sub_seds_fname, ) return sub_seds_fname # run the above function par_tuples = [(i, sub_name) for i, sub_name in enumerate(custom_sub_pspec)][ subset_slice ] parallel_wrapper(gen_subgrid, par_tuples, nprocs=nprocs) # Save a list of subgrid names that we expect to see required_names = [ "{}seds.gridsub{}.hd5".format(file_prefix, i) for i in range(nsubs) ] outdir = os.path.join(".", settings.project) subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt") with open(subgrid_names_file, "w") as fname_file: for fname in required_names: fname_file.write(fname + "\n")
def run_fitting( use_sd=True, nsubs=1, nprocs=1, choose_sd_sub=None, choose_subgrid=None, pdf2d_param_list=['Av', 'Rv', 'f_A', 'M_ini', 'logA', 'Z', 'distance'], resume=False, ): """ Run the fitting. If nsubs > 1, this will find existing subgrids. If use_sd is True, will also incorporate source density info. The additional choose_* options are to make queue scripts usable, by specifying a given SD+sub and/or subgrid for the fitting run. Parameters ---------- use_sd : boolean (default=True) If True, create source density dependent noise models (determined by finding matches to datamodel.astfile with SD info) nsubs : int (default=1) number of subgrids used for the physics model nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) choose_sd_sub : list of two strings (default=None) If this is set, the fitting will just be for this combo of SD+sub, rather than all of them. Overrides use_sd. format of the list: ['#','#'] choose_subgrid : int (default=None) If this is set, the fitting with just be for this subgrid index. If nsubs=1, this is ignored. pdf2d_param_list : list of strings or None If set, do 2D PDFs of these parameters. If None, don't make 2D PDFs. resume : boolean (default=False) choose whether to resume existing run or start over """ # before doing ANYTHING, force datamodel to re-import (otherwise, any # changes within this python session will not be loaded!) importlib.reload(datamodel) # check input parameters verify_params.verify_input_format(datamodel) # keep track of time start_time = time.clock() # -------------------- # make lists of file names # -------------------- file_dict = create_filenames.create_filenames( use_sd=use_sd, nsubs=nsubs, choose_sd_sub=choose_sd_sub, choose_subgrid=choose_subgrid, ) # input files photometry_files = file_dict["photometry_files"] # modelsedgrid_files = file_dict["modelsedgrid_files"] modelsedgrid_trim_files = file_dict["modelsedgrid_trim_files"] # noise_files = file_dict["noise_files"] noise_trim_files = file_dict["noise_trim_files"] # output files stats_files = file_dict["stats_files"] pdf_files = file_dict["pdf_files"] pdf2d_files = file_dict["pdf2d_files"] if pdf2d_param_list is None: pdf2d_files = [None for i in range(len(pdf2d_files))] lnp_files = file_dict["lnp_files"] # total number of files n_files = len(photometry_files) # other potentially useful info sd_sub_info = file_dict["sd_sub_info"] # gridsub_info = file_dict['gridsub_info'] # if using subgrids, make the grid dictionary file: # File where the ranges and number of unique values for the grid # will be stored (this can take a while to calculate) if nsubs > 1: gridpickle_files = file_dict["gridpickle_files"] for i in range(len(gridpickle_files)): if not os.path.isfile(gridpickle_files[i]): # list of corresponding SED grids and noise models # - with SD+sub: get file list for ALL subgrids at current SD+sub if use_sd or (choose_sd_sub is not None): temp = create_filenames.create_filenames( nsubs=nsubs, choose_sd_sub=sd_sub_info[i], choose_subgrid=None ) modelsedgrid_trim_list = temp["modelsedgrid_trim_files"] noise_trim_list = temp["noise_trim_files"] # - no SD info: get file list for ALL subgrids else: temp = create_filenames.create_filenames( use_sd=False, nsubs=nsubs, choose_subgrid=None ) modelsedgrid_trim_list = temp["modelsedgrid_trim_files"] noise_trim_list = temp["noise_trim_files"] # create the grid info dictionary print("creating grid_info_dict for " + gridpickle_files[i]) grid_info_dict = subgridding_tools.reduce_grid_info( modelsedgrid_trim_list, noise_trim_list, nprocs=nprocs ) # save it with open(gridpickle_files[i], "wb") as p: pickle.dump(grid_info_dict, p) print("wrote grid_info_dict to " + gridpickle_files[i]) # -------------------- # do the fitting! # -------------------- # set up function inputs if nsubs == 1: input_list = [ ( photometry_files[i], modelsedgrid_trim_files[i], noise_trim_files[i], stats_files[i], pdf_files[i], pdf2d_files[i], pdf2d_param_list, lnp_files[i], None, resume, ) for i in range(n_files) ] if nsubs > 1: input_list = [ ( photometry_files[i], modelsedgrid_trim_files[i], noise_trim_files[i], stats_files[i], pdf_files[i], pdf2d_files[i], pdf2d_param_list, lnp_files[i], gridpickle_files[i], resume, ) for i in range(n_files) ] # run the fitting (via parallel wrapper) parallel_wrapper(fit_submodel, input_list, nprocs=nprocs) # see how long it took! new_time = time.clock() print("time to fit: ", (new_time - start_time) / 60.0, " min")
def create_obsmodel( beast_settings_info, use_sd=True, nsubs=1, nprocs=1, subset=[None, None], ): """ Create the observation models. If nsubs > 1, this will find existing subgrids. If use_sd is True, will also incorporate source density info. Parameters ---------- beast_settings_info : string or beast.tools.beast_settings.beast_settings instance if string: file name with beast settings if class: beast.tools.beast_settings.beast_settings instance use_sd : boolean (default=True) If True, create source density dependent noise models (determined by finding matches to settings.astfile with SD info) nsubs : int (default=1) number of subgrids used for the physics model nprocs : int (default=1) Number of parallel processes to use (currently only implemented for subgrids) subset : list of two ints (default=[None,None]) Only process subgrids in the range [start,stop]. (only relevant if nsubs > 1) """ # process beast settings info if isinstance(beast_settings_info, str): settings = beast_settings.beast_settings(beast_settings_info) elif isinstance(beast_settings_info, beast_settings.beast_settings): settings = beast_settings_info else: raise TypeError( "beast_settings_info must be string or beast.tools.beast_settings.beast_settings instance" ) # -------------------- # figure out if there are source density bins # -------------------- ast_file_list = sorted( glob.glob(settings.astfile.replace(".fits", "*_bin*"))) if use_sd and (len(ast_file_list) > 0): sd_list = [] for ast_file in ast_file_list: dpos = ast_file.rfind("_bin") ppos = ast_file.rfind(".") sd_list.append(ast_file[dpos + 4:ppos]) print("sd list: ", sd_list) else: # if there are no ASTs with source densities, the flag should be "false" use_sd = False # -------------------- # no subgrids # -------------------- if nsubs == 1: modelsedgridfile = "{0}/{0}_seds.grid.hd5".format(settings.project) # if we're splitting by source density if use_sd: input_list = [(settings, modelsedgridfile, curr_sd) for curr_sd in sd_list] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs) # if we're not splitting by source density else: input_list = [(settings, modelsedgridfile, None)] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs) # -------------------- # use subgrids # -------------------- if nsubs > 1: # get the list of physics model files outdir = os.path.join(".", settings.project) subgrid_names_file = os.path.join(outdir, "subgrid_fnames.txt") modelsedgridfiles = get_modelsubgridfiles(subgrid_names_file)[slice( subset[0], subset[1])] # if we're splitting by source density if use_sd: input_list = [(settings, sedfile, curr_sd) for sedfile in modelsedgridfiles for curr_sd in sd_list] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs) # if we're not splitting by source density else: input_list = [(settings, sedfile, None) for sedfile in modelsedgridfiles] parallel_wrapper(gen_obsmodel, input_list, nprocs=nprocs)