def make_inference_corner_plot(workflow, mcmc_file, output_dir, config_file, name="mcmc_corner", analysis_seg=None, tags=None): """ Sets up the corner plot of the posteriors in the workflow. Parameters ---------- workflow: pycbc.workflow.Workflow The core workflow instance we are populating mcmc_file: pycbc.workflow.File The file with MCMC samples. output_dir: str The directory to store result plots and files. config_file: str The path to the inference configuration file that has a [variable_args] section. name: str The name in the [executables] section of the configuration file to use. analysis_segs: {None, glue.segments.Segment} The segment this job encompasses. If None then use the total analysis time from the workflow. tags: {None, optional} Tags to add to the minifollowups executables. Returns ------- pycbc.workflow.FileList A list of result and output files. """ # default values tags = [] if tags is None else tags analysis_seg = workflow.analysis_time \ if analysis_seg is None else analysis_seg # read config file to get variables that vary cp = WorkflowConfigParser([config_file]) variable_args = cp.options("variable_args") # add derived mass parameters if mass1 and mass2 in variable_args if "mass1" in variable_args and "mass2" in variable_args: variable_args += ["mchirp", "eta"] # make the directory that will contain the output files makedir(output_dir) # make a node for plotting the posterior as a corner plot node = PlotExecutable(workflow.cp, name, ifos=workflow.ifos, out_dir=output_dir, universe="local", tags=tags).create_node() # add command line options node.add_input_opt("--input-file", mcmc_file) node.new_output_file_opt(analysis_seg, ".png", "--output-file") # add node to workflow workflow += node return node.output_files
def setUp(self): # set random seed numpy.random.seed(1024) # path to example configuration file for testing config_path = "/".join([os.path.dirname(os.path.realpath(__file__)), "../examples/distributions/example.ini"]) # get a set of simulated command line options for # configuration file reading class Arguments(object): config_overrides = [] config_files = [config_path] self.opts = Arguments() # read configuration files self.cp = WorkflowConfigParser.from_cli(self.opts) args = distributions.read_args_from_config(self.cp) self.variable_args, self.static_args, self.contraints = args # read distributions self.dists = distributions.read_distributions_from_config(self.cp) # check that all distriubtions will be tested for dname, dclass in distributions.distribs.iteritems(): if (not numpy.any([isinstance(dist, dclass) for dist in self.dists]) and dname not in EXCLUDE_DIST_NAMES): raise ValueError("There is no test for {}".format(dname))
def setUp(self): # set random seed numpy.random.seed(1024) # path to example configuration file for testing config_path = "/".join([os.path.dirname(os.path.realpath(__file__)), "../examples/distributions/example.ini"]) # get a set of simulated command line options for # configuration file reading class Arguments(object): config_overrides = [] config_delete = [] config_files = [config_path] self.opts = Arguments() # read configuration files self.cp = WorkflowConfigParser.from_cli(self.opts) self.variable_args, self.static_args = \ distributions.read_params_from_config(self.cp) self.constraints = distributions.read_constraints_from_config(self.cp) # read distributions self.dists = distributions.read_distributions_from_config(self.cp) # check that all distriubtions will be tested for dname in distributions.distribs: dclass = distributions.distribs[dname] if (not numpy.any([isinstance(dist, dclass) for dist in self.dists]) and dname not in EXCLUDE_DIST_NAMES): raise ValueError("There is no test for {}".format(dname))
def __init__( self, config_files: Union[List[Union[str, os.PathLike]], Union[str, os.PathLike]], seed: Optional[int] = None, ): """Class to generate gravitational waveform parameters using PyCBC workflow and distribution packages. """ if seed is not None: raise NotImplementedError( "Reproducible random seed not yet implemented.") self.config_files = config_files if isinstance( config_files, list) else [config_files] self.config_parser = WorkflowConfigParser( configFiles=self.config_files) self.parameters, self.static_args = read_params_from_config( self.config_parser) self.constraints = read_constraints_from_config(self.config_parser) self.transforms = read_transforms_from_config(self.config_parser) self.distribution = JointDistribution( self.parameters, *read_distributions_from_config(self.config_parser), **{'constraints': self.constraints}) # ensure statistics match output of self.parameters self.statistics = compute_parameter_statistics({ parameter: self.distribution.bounds[parameter] for parameter in self.parameters })
def __init__(self, config_file, random_seed): # Fix the seed for the random number generator np.random.seed(random_seed) # Read in the configuration file using a WorkflowConfigParser. # Note that the argument `configFiles` has to be a list here, # so we need to wrap the `config_file` argument accordingly... config_file = WorkflowConfigParser(configFiles=[config_file]) # Extract variable arguments and constraints # We don't need the static_args here, hence they do not get amended. self.var_args, _ = read_params_from_config(config_file) self.constraints = read_constraints_from_config(config_file) # Extract distributions dist = read_distributions_from_config(config_file) # Extract transformations self.trans = read_transforms_from_config(config_file) # Set up a joint distribution to sample from self.pval = JointDistribution(self.var_args, *dist, **{'constraints': self.constraints})
def config(scope='function'): # create WorkflowConfigParser and yield to test method with tempfile_with_content(TEST_CONFIGURATION) as cfo: yield WorkflowConfigParser([cfo.name]) # clean up after WorkflowConfigParser _base = os.path.basename(cfo.name) if os.path.exists(_base): os.unlink(os.path.basename(_base))
def __init__(self, config_file, seed=0): numpy.random.seed(seed) config_file = WorkflowConfigParser(config_file, None) var_args, self.static, constraints = read_args_from_config(config_file) dist = read_distributions_from_config(config_file) self.trans = read_transforms_from_config(config_file) self.pval = JointDistribution(var_args, *dist, **{"constraints": constraints})
def config_parser_from_cli(opts): """Loads a config file from the given options, applying any overrides specified. Specifically, config files are loaded from the `--config-files` options while overrides are loaded from `--config-overrides`. """ # read configuration file logging.info("Reading configuration file") if opts.config_overrides is not None: overrides = [override.split(":") for override in opts.config_overrides] else: overrides = None return WorkflowConfigParser(opts.config_files, overrides)
def read_config_file(self, return_cp=True, index=-1): """Reads the config file that was used. A ``ValueError`` is raised if no config files have been saved, or if the requested index larger than the number of stored config files. Parameters ---------- return_cp : bool, optional If true, returns the loaded config file as :py:class:`pycbc.workflow.configuration.WorkflowConfigParser` type. Otherwise will return as string buffer. Default is True. index : int, optional The config file to load. If ``write_config_file`` has been called multiple times (as would happen if restarting from a checkpoint), there will be config files stored. Default (-1) is to load the last saved file. Returns ------- WorkflowConfigParser or StringIO : The parsed config file. """ # get the stored indices try: indices = sorted(map(int, self[self.config_group].keys())) index = indices[index] except KeyError: raise ValueError("no config files saved in hdf") except IndexError: raise ValueError("no config file matches requested index") cf = load_state(self, path=self.config_group, dsetname=str(index)) cf.seek(0) if return_cp: cp = WorkflowConfigParser() cp.read_file(cf) return cp return cf
def read_ini_config(file_path): """ Read in a `*.ini` config file, which is used mostly to specify the waveform simulation (for example, the waveform model, the parameter space for the binary black holes, etc.) and return its contents. Args: file_path (str): Path to the `*.ini` config file to be read in. Returns: A tuple `(variable_arguments, static_arguments)` where * `variable_arguments` should simply be a list of all the parameters which get randomly sampled from the specified distributions, usually using an instance of :class:`utils.waveforms.WaveformParameterGenerator`. * `static_arguments` should be a dictionary containing the keys and values of the parameters that are the same for each example that is generated (i.e., the non-physical parameters such as the waveform model and the sampling rate). """ # Make sure the config file actually exists if not os.path.exists(file_path): raise IOError('Specified configuration file does not exist: ' '{}'.format(file_path)) # Set up a parser for the PyCBC config file workflow_config_parser = WorkflowConfigParser(configFiles=[file_path]) # Read the variable_arguments and static_arguments using the parser variable_arguments, static_arguments = \ read_params_from_config(workflow_config_parser) # Typecast and amend the static arguments static_arguments = typecast_static_args(static_arguments) static_arguments = amend_static_args(static_arguments) return variable_arguments, static_arguments
def make_inference_single_parameter_plots(workflow, mcmc_file, output_dir, config_file, samples_name="mcmc_samples", auto_name="mcmc_acf", analysis_seg=None, tags=None): """ Sets up single-parameter plots from MCMC in the workflow. Parameters ---------- workflow: pycbc.workflow.Workflow The core workflow instance we are populating mcmc_file: pycbc.workflow.File The file with MCMC samples. output_dir: str The directory to store result plots and files. config_file: str The path to the inference configuration file that has a [variable_args] section. samples_name: str The name in the [executables] section of the configuration file to use for the plot that shows all samples. auto_name: str The name in the [executables] section of the configuration file to use for the autocorrelation function plot. analysis_segs: {None, glue.segments.Segment} The segment this job encompasses. If None then use the total analysis time from the workflow. tags: {None, optional} Tags to add to the minifollowups executables. Returns ------- files: pycbc.workflow.FileList A list of result and output files. """ # default values tags = [] if tags is None else tags analysis_seg = workflow.analysis_time \ if analysis_seg is None else analysis_seg # read config file to get variables that vary cp = WorkflowConfigParser([config_file]) variable_args = cp.options("variable_args") # make the directory that will contain the output files makedir(output_dir) # list of all output files files = FileList() # make a set of plots for each parameter for arg in variable_args: # make a node for plotting all the samples samples_node = PlotExecutable(workflow.cp, samples_name, ifos=workflow.ifos, out_dir=output_dir, tags=tags + [arg]).create_node() # add command line options samples_node.add_input_opt("--input-file", mcmc_file) samples_node.new_output_file_opt(analysis_seg, ".png", "--output-file") samples_node.add_opt("--variable-args", arg) samples_node.add_opt("--labels", arg) # make node for plotting the autocorrelation function for each walker auto_node = PlotExecutable(workflow.cp, auto_name, ifos=workflow.ifos, out_dir=output_dir, tags=tags + [arg]).create_node() # add command line options auto_node.add_input_opt("--input-file", mcmc_file) auto_node.new_output_file_opt(analysis_seg, ".png", "--output-file") auto_node.add_opt("--variable-args", arg) # add nodes to workflow workflow += samples_node workflow += auto_node # add files to output files list files += samples_node.output_files files += auto_node.output_files return files
def from_config(cls, cp, **kwargs): r"""Initializes an instance of this class from the given config file. Sub-models are initialized before initializing this class. The model section must have a ``submodels`` argument that lists the names of all the submodels to generate as a space-separated list. Each sub-model should have its own ``[{label}__model]`` section that sets up the model for that sub-model. For example: .. code-block:: ini [model] name = hiearchical submodels = event1 event2 [event1__model] <event1 model options> [event2__model] <event2 model options> Similarly, all other sections that are specific to a model should start with the model's label. All sections starting with a model's label will be passed to that model's ``from_config`` method with the label removed from the section name. For example, if a sub-model requires a data section to be specified, it should be titled ``[{label}__data]``. Upon initialization, the ``{label}__`` will be stripped from the section header and passed to the model. No model labels should preceed the ``variable_params``, ``static_params``, ``waveform_transforms``, or ``sampling_transforms`` sections. Instead, the parameters specified in these sections should follow the naming conventions described in :py:class:`HierachicalParam` to determine which sub-model(s) they belong to. (Sampling parameters can follow any naming convention, as they are only handled by the hierarchical model.) This is because the hierarchical model handles all transforms, communication with the sampler, file IO, and prior calculation. Only sub-model's loglikelihood functions are called. Metadata for each sub-model is written to the output hdf file under groups given by the sub-model label. For example, if we have two submodels labelled ``event1`` and ``event2``, there will be groups with the same names in the top level of the output that contain that model's subdata. For instance, if event1 used the ``gaussian_noise`` model, the GW data and PSDs will be found in ``event1/data`` and the low frequency cutoff used for that model will be in the ``attrs`` of the ``event1`` group. Parameters ---------- cp : WorkflowConfigParser Config file parser to read. \**kwargs : All additional keyword arguments are passed to the class. Any provided keyword will override what is in the config file. """ # we need the read from config function from the init; to prevent # circular imports, we import it here from pycbc.inference.models import read_from_config # get the submodels submodel_lbls = shlex.split(cp.get('model', 'submodels')) # sort parameters by model vparam_map = map_params( hpiter(cp.options('variable_params'), submodel_lbls)) sparam_map = map_params( hpiter(cp.options('static_params'), submodel_lbls)) # we'll need any waveform transforms for the initializing sub-models, # as the underlying models will receive the output of those transforms if any(cp.get_subsections('waveform_transforms')): waveform_transforms = transforms.read_transforms_from_config( cp, 'waveform_transforms') wfoutputs = set.union(*[t.outputs for t in waveform_transforms]) wfparam_map = map_params(hpiter(wfoutputs, submodel_lbls)) else: wfparam_map = {lbl: [] for lbl in submodel_lbls} # initialize the models submodels = {} logging.info("Loading submodels") for lbl in submodel_lbls: logging.info("============= %s =============", lbl) # create a config parser to pass to the model subcp = WorkflowConfigParser() # copy sections over that start with the model label (this should # include the [model] section for that model) copy_sections = [ HierarchicalParam(sec, submodel_lbls) for sec in cp.sections() if lbl in sec.split('-')[0].split( HierarchicalParam.delim, 1)[0] ] for sec in copy_sections: # check that the user isn't trying to set variable or static # params for the model (we won't worry about waveform or # sampling transforms here, since that is checked for in the # __init__) if sec.subname in ['variable_params', 'static_params']: raise ValueError("Section {} found in the config file; " "[variable_params] and [static_params] " "sections should not include model " "labels. To specify parameters unique to " "one or more sub-models, prepend the " "individual parameter names with the " "model label. See HierarchicalParam for " "details.".format(sec)) subcp.add_section(sec.subname) for opt, val in cp.items(sec): subcp.set(sec.subname, opt, val) # set the static params subcp.add_section('static_params') for param in sparam_map[lbl]: subcp.set('static_params', param.subname, cp.get('static_params', param.fullname)) # set the variable params: for now we'll just set all the # variable params as static params # so that the model doesn't raise an error looking for # prior sections. We'll then manually set the variable # params after the model is initialized subcp.add_section('variable_params') for param in vparam_map[lbl]: subcp.set('static_params', param.subname, 'REPLACE') # add the outputs from the waveform transforms for param in wfparam_map[lbl]: subcp.set('static_params', param.subname, 'REPLACE') # initialize submodel = read_from_config(subcp) # move the static params back to variable for p in vparam_map[lbl]: submodel.static_params.pop(p.subname) submodel.variable_params = tuple(p.subname for p in vparam_map[lbl]) # remove the waveform transform parameters for p in wfparam_map[lbl]: submodel.static_params.pop(p.subname) # store submodels[lbl] = submodel logging.info("") # now load the model logging.info("Loading hierarchical model") return super().from_config(cp, submodels=submodels)
def test_from_config(self, random_data, request): """Test the function which loads data from a configuration file. Here we assume we are just marginalizing over distance with a uniform prior [50, 5000) """ param = { "approximant": "IMRPhenomPv2", "f_lower": "20", "f_ref": "20", "ra": "1.5", "dec": "-0.5", "polarization": "0.5" } cp = WorkflowConfigParser() cp.add_section("model") cp.set("model", "name", "marginalized_gaussian_noise") cp.set("model", "distance_marginalization", "") cp.add_section("marginalized_prior-distance") cp.set("marginalized_prior-distance", "name", "uniform") cp.set("marginalized_prior-distance", "min-distance", "50") cp.set("marginalized_prior-distance", "max-distance", "5000") cp.add_section("variable_params") cp.set("variable_params", "tc", "") cp.add_section("static_params") for key in param.keys(): cp.set("static_params", key, param[key]) cp.add_section("prior-tc") cp.set("prior-tc", "name", "uniform") cp.set("prior-tc", "min-tc", "1126259462.32") cp.set("prior-tc", "max-tc", "1126259462.52") data = {ifo: random_data for ifo in self.ifos} model = models.MarginalizedGaussianNoise.from_config(cp, data) marg_priors = model._marg_prior keys = list(marg_priors.keys()) assert keys[0] == "distance" assert model._margdist assert marg_priors["distance"].bounds["distance"].min == 50.0 assert marg_priors["distance"].bounds["distance"].max == 5000.0