Example #1
0
def make_inference_corner_plot(workflow, mcmc_file, output_dir, config_file,
                    name="mcmc_corner", analysis_seg=None, tags=None):
    """ Sets up the corner plot of the posteriors in the workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    mcmc_file: pycbc.workflow.File
        The file with MCMC samples.
    output_dir: str
        The directory to store result plots and files.
    config_file: str
        The path to the inference configuration file that has a
        [variable_args] section.
    name: str
        The name in the [executables] section of the configuration file
        to use.
    analysis_segs: {None, glue.segments.Segment}
       The segment this job encompasses. If None then use the total analysis
       time from the workflow.
    tags: {None, optional}
        Tags to add to the minifollowups executables.

    Returns
    -------
    pycbc.workflow.FileList
        A list of result and output files. 
    """

    # default values
    tags = [] if tags is None else tags
    analysis_seg = workflow.analysis_time \
                       if analysis_seg is None else analysis_seg

    # read config file to get variables that vary
    cp = WorkflowConfigParser([config_file])
    variable_args = cp.options("variable_args")

    # add derived mass parameters if mass1 and mass2 in variable_args
    if "mass1" in variable_args and "mass2" in variable_args:
        variable_args += ["mchirp", "eta"]

    # make the directory that will contain the output files
    makedir(output_dir)

    # make a node for plotting the posterior as a corner plot
    node = PlotExecutable(workflow.cp, name, ifos=workflow.ifos,
                      out_dir=output_dir, universe="local",
                      tags=tags).create_node()

    # add command line options
    node.add_input_opt("--input-file", mcmc_file)
    node.new_output_file_opt(analysis_seg, ".png", "--output-file")

    # add node to workflow
    workflow += node

    return node.output_files
    def setUp(self):

        # set random seed
        numpy.random.seed(1024)

        # path to example configuration file for testing
        config_path = "/".join([os.path.dirname(os.path.realpath(__file__)),
                                "../examples/distributions/example.ini"])

        # get a set of simulated command line options for
        # configuration file reading
        class Arguments(object):
            config_overrides = []
            config_files = [config_path]
        self.opts = Arguments()

        # read configuration files
        self.cp = WorkflowConfigParser.from_cli(self.opts)
        args = distributions.read_args_from_config(self.cp)
        self.variable_args, self.static_args, self.contraints = args

        # read distributions
        self.dists = distributions.read_distributions_from_config(self.cp)

        # check that all distriubtions will be tested
        for dname, dclass in distributions.distribs.iteritems():
            if (not numpy.any([isinstance(dist, dclass)
                               for dist in self.dists])
                    and dname not in EXCLUDE_DIST_NAMES):
                raise ValueError("There is no test for {}".format(dname))
    def setUp(self):

        # set random seed
        numpy.random.seed(1024)

        # path to example configuration file for testing
        config_path = "/".join([os.path.dirname(os.path.realpath(__file__)),
                                "../examples/distributions/example.ini"])

        # get a set of simulated command line options for
        # configuration file reading
        class Arguments(object):
            config_overrides = []
            config_delete = []
            config_files = [config_path]
        self.opts = Arguments()

        # read configuration files
        self.cp = WorkflowConfigParser.from_cli(self.opts)
        self.variable_args, self.static_args = \
            distributions.read_params_from_config(self.cp)
        self.constraints = distributions.read_constraints_from_config(self.cp)

        # read distributions
        self.dists = distributions.read_distributions_from_config(self.cp)

        # check that all distriubtions will be tested
        for dname in distributions.distribs:
            dclass = distributions.distribs[dname]
            if (not numpy.any([isinstance(dist, dclass)
                               for dist in self.dists])
                    and dname not in EXCLUDE_DIST_NAMES):
                raise ValueError("There is no test for {}".format(dname))
Example #4
0
    def __init__(
        self,
        config_files: Union[List[Union[str, os.PathLike]], Union[str,
                                                                 os.PathLike]],
        seed: Optional[int] = None,
    ):
        """Class to generate gravitational waveform parameters using PyCBC workflow and distribution packages.
        """
        if seed is not None:
            raise NotImplementedError(
                "Reproducible random seed not yet implemented.")

        self.config_files = config_files if isinstance(
            config_files, list) else [config_files]
        self.config_parser = WorkflowConfigParser(
            configFiles=self.config_files)

        self.parameters, self.static_args = read_params_from_config(
            self.config_parser)
        self.constraints = read_constraints_from_config(self.config_parser)
        self.transforms = read_transforms_from_config(self.config_parser)
        self.distribution = JointDistribution(
            self.parameters,
            *read_distributions_from_config(self.config_parser),
            **{'constraints': self.constraints})

        # ensure statistics match output of self.parameters
        self.statistics = compute_parameter_statistics({
            parameter: self.distribution.bounds[parameter]
            for parameter in self.parameters
        })
    def __init__(self,
                 config_file,
                 random_seed):

        # Fix the seed for the random number generator
        np.random.seed(random_seed)

        # Read in the configuration file using a WorkflowConfigParser.
        # Note that the argument `configFiles` has to be a list here,
        # so we need to wrap the `config_file` argument accordingly...
        config_file = WorkflowConfigParser(configFiles=[config_file])

        # Extract variable arguments and constraints
        # We don't need the static_args here, hence they do not get amended.
        self.var_args, _ = read_params_from_config(config_file)
        self.constraints = read_constraints_from_config(config_file)

        # Extract distributions
        dist = read_distributions_from_config(config_file)

        # Extract transformations
        self.trans = read_transforms_from_config(config_file)

        # Set up a joint distribution to sample from
        self.pval = JointDistribution(self.var_args,
                                      *dist,
                                      **{'constraints': self.constraints})
Example #6
0
def config(scope='function'):
    # create WorkflowConfigParser and yield to test method
    with tempfile_with_content(TEST_CONFIGURATION) as cfo:
        yield WorkflowConfigParser([cfo.name])

    # clean up after WorkflowConfigParser
    _base = os.path.basename(cfo.name)
    if os.path.exists(_base):
        os.unlink(os.path.basename(_base))
Example #7
0
    def __init__(self, config_file, seed=0):
        numpy.random.seed(seed)
        config_file = WorkflowConfigParser(config_file, None)
        var_args, self.static, constraints = read_args_from_config(config_file)
        dist = read_distributions_from_config(config_file)

        self.trans = read_transforms_from_config(config_file)
        self.pval = JointDistribution(var_args, *dist, 
                                **{"constraints": constraints})   
Example #8
0
def config_parser_from_cli(opts):
    """Loads a config file from the given options, applying any overrides
    specified. Specifically, config files are loaded from the `--config-files`
    options while overrides are loaded from `--config-overrides`.
    """
    # read configuration file
    logging.info("Reading configuration file")
    if opts.config_overrides is not None:
        overrides = [override.split(":") for override in opts.config_overrides]
    else:
        overrides = None
    return WorkflowConfigParser(opts.config_files, overrides)
Example #9
0
    def read_config_file(self, return_cp=True, index=-1):
        """Reads the config file that was used.

        A ``ValueError`` is raised if no config files have been saved, or if
        the requested index larger than the number of stored config files.

        Parameters
        ----------
        return_cp : bool, optional
            If true, returns the loaded config file as
            :py:class:`pycbc.workflow.configuration.WorkflowConfigParser`
            type. Otherwise will return as string buffer. Default is True.
        index : int, optional
            The config file to load. If ``write_config_file`` has been called
            multiple times (as would happen if restarting from a checkpoint),
            there will be config files stored. Default (-1) is to load the
            last saved file.

        Returns
        -------
        WorkflowConfigParser or StringIO :
            The parsed config file.
        """
        # get the stored indices
        try:
            indices = sorted(map(int, self[self.config_group].keys()))
            index = indices[index]
        except KeyError:
            raise ValueError("no config files saved in hdf")
        except IndexError:
            raise ValueError("no config file matches requested index")
        cf = load_state(self, path=self.config_group, dsetname=str(index))
        cf.seek(0)
        if return_cp:
            cp = WorkflowConfigParser()
            cp.read_file(cf)
            return cp
        return cf
Example #10
0
def read_ini_config(file_path):
    """
    Read in a `*.ini` config file, which is used mostly to specify the
    waveform simulation (for example, the waveform model, the parameter
    space for the binary black holes, etc.) and return its contents.
    
    Args:
        file_path (str): Path to the `*.ini` config file to be read in.

    Returns:
        A tuple `(variable_arguments, static_arguments)` where
        
        * `variable_arguments` should simply be a list of all the
          parameters which get randomly sampled from the specified
          distributions, usually using an instance of
          :class:`utils.waveforms.WaveformParameterGenerator`.
        * `static_arguments` should be a dictionary containing the keys
          and values of the parameters that are the same for each
          example that is generated (i.e., the non-physical parameters
          such as the waveform model and the sampling rate).
    """

    # Make sure the config file actually exists
    if not os.path.exists(file_path):
        raise IOError('Specified configuration file does not exist: '
                      '{}'.format(file_path))

    # Set up a parser for the PyCBC config file
    workflow_config_parser = WorkflowConfigParser(configFiles=[file_path])

    # Read the variable_arguments and static_arguments using the parser
    variable_arguments, static_arguments = \
        read_params_from_config(workflow_config_parser)

    # Typecast and amend the static arguments
    static_arguments = typecast_static_args(static_arguments)
    static_arguments = amend_static_args(static_arguments)

    return variable_arguments, static_arguments
Example #11
0
def make_inference_single_parameter_plots(workflow, mcmc_file, output_dir,
                    config_file, samples_name="mcmc_samples",
                    auto_name="mcmc_acf", analysis_seg=None, tags=None):
    """ Sets up single-parameter plots from MCMC in the workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    mcmc_file: pycbc.workflow.File
        The file with MCMC samples.
    output_dir: str
        The directory to store result plots and files.
    config_file: str
        The path to the inference configuration file that has a
        [variable_args] section.
    samples_name: str
        The name in the [executables] section of the configuration file
        to use for the plot that shows all samples.
    auto_name: str
        The name in the [executables] section of the configuration file
        to use for the autocorrelation function plot.
    analysis_segs: {None, glue.segments.Segment}
       The segment this job encompasses. If None then use the total analysis
       time from the workflow.
    tags: {None, optional}
        Tags to add to the minifollowups executables.

    Returns
    -------
    files: pycbc.workflow.FileList
        A list of result and output files. 
    """

    # default values
    tags = [] if tags is None else tags
    analysis_seg = workflow.analysis_time \
                       if analysis_seg is None else analysis_seg

    # read config file to get variables that vary
    cp = WorkflowConfigParser([config_file])
    variable_args = cp.options("variable_args")

    # make the directory that will contain the output files
    makedir(output_dir)

    # list of all output files
    files = FileList()

    # make a set of plots for each parameter
    for arg in variable_args:

        # make a node for plotting all the samples
        samples_node = PlotExecutable(workflow.cp, samples_name,
                          ifos=workflow.ifos, out_dir=output_dir,
                          tags=tags + [arg]).create_node()

        # add command line options
        samples_node.add_input_opt("--input-file", mcmc_file)
        samples_node.new_output_file_opt(analysis_seg, ".png", "--output-file")
        samples_node.add_opt("--variable-args", arg)
        samples_node.add_opt("--labels", arg)

        # make node for plotting the autocorrelation function for each walker
        auto_node = PlotExecutable(workflow.cp, auto_name, ifos=workflow.ifos,
                          out_dir=output_dir, tags=tags + [arg]).create_node()

        # add command line options
        auto_node.add_input_opt("--input-file", mcmc_file)
        auto_node.new_output_file_opt(analysis_seg, ".png", "--output-file")
        auto_node.add_opt("--variable-args", arg)

        # add nodes to workflow
        workflow += samples_node
        workflow += auto_node

        # add files to output files list
        files += samples_node.output_files
        files += auto_node.output_files

    return files
Example #12
0
    def from_config(cls, cp, **kwargs):
        r"""Initializes an instance of this class from the given config file.

        Sub-models are initialized before initializing this class. The model
        section must have a ``submodels`` argument that lists the names of all
        the submodels to generate as a space-separated list. Each sub-model
        should have its own ``[{label}__model]`` section that sets up the
        model for that sub-model. For example:

        .. code-block:: ini

            [model]
            name = hiearchical
            submodels = event1 event2

            [event1__model]
            <event1 model options>

            [event2__model]
            <event2 model options>

        Similarly, all other sections that are specific to a model should start
        with the model's label. All sections starting with a model's label will
        be passed to that model's ``from_config`` method with the label removed
        from the section name. For example, if a sub-model requires a data
        section to be specified, it should be titled ``[{label}__data]``. Upon
        initialization, the ``{label}__`` will be stripped from the section
        header and passed to the model.

        No model labels should preceed the ``variable_params``,
        ``static_params``, ``waveform_transforms``, or ``sampling_transforms``
        sections.  Instead, the parameters specified in these sections should
        follow the naming conventions described in :py:class:`HierachicalParam`
        to determine which sub-model(s) they belong to. (Sampling parameters
        can follow any naming convention, as they are only handled by the
        hierarchical model.) This is because the hierarchical model handles
        all transforms, communication with the sampler, file IO, and prior
        calculation. Only sub-model's loglikelihood functions are called.

        Metadata for each sub-model is written to the output hdf file under
        groups given by the sub-model label. For example, if we have two
        submodels labelled ``event1`` and ``event2``, there will be groups
        with the same names in the top level of the output that contain that
        model's subdata. For instance, if event1 used the ``gaussian_noise``
        model, the GW data and PSDs will be found in ``event1/data`` and the
        low frequency cutoff used for that model will be in the ``attrs`` of
        the ``event1`` group.

        Parameters
        ----------
        cp : WorkflowConfigParser
            Config file parser to read.
        \**kwargs :
            All additional keyword arguments are passed to the class. Any
            provided keyword will override what is in the config file.
        """
        # we need the read from config function from the init; to prevent
        # circular imports, we import it here
        from pycbc.inference.models import read_from_config
        # get the submodels
        submodel_lbls = shlex.split(cp.get('model', 'submodels'))
        # sort parameters by model
        vparam_map = map_params(
            hpiter(cp.options('variable_params'), submodel_lbls))
        sparam_map = map_params(
            hpiter(cp.options('static_params'), submodel_lbls))

        # we'll need any waveform transforms for the initializing sub-models,
        # as the underlying models will receive the output of those transforms
        if any(cp.get_subsections('waveform_transforms')):
            waveform_transforms = transforms.read_transforms_from_config(
                cp, 'waveform_transforms')
            wfoutputs = set.union(*[t.outputs for t in waveform_transforms])
            wfparam_map = map_params(hpiter(wfoutputs, submodel_lbls))
        else:
            wfparam_map = {lbl: [] for lbl in submodel_lbls}
        # initialize the models
        submodels = {}
        logging.info("Loading submodels")
        for lbl in submodel_lbls:
            logging.info("============= %s =============", lbl)
            # create a config parser to pass to the model
            subcp = WorkflowConfigParser()
            # copy sections over that start with the model label (this should
            # include the [model] section for that model)
            copy_sections = [
                HierarchicalParam(sec, submodel_lbls)
                for sec in cp.sections() if lbl in sec.split('-')[0].split(
                    HierarchicalParam.delim, 1)[0]
            ]
            for sec in copy_sections:
                # check that the user isn't trying to set variable or static
                # params for the model (we won't worry about waveform or
                # sampling transforms here, since that is checked for in the
                # __init__)
                if sec.subname in ['variable_params', 'static_params']:
                    raise ValueError("Section {} found in the config file; "
                                     "[variable_params] and [static_params] "
                                     "sections should not include model "
                                     "labels. To specify parameters unique to "
                                     "one or more sub-models, prepend the "
                                     "individual parameter names with the "
                                     "model label. See HierarchicalParam for "
                                     "details.".format(sec))
                subcp.add_section(sec.subname)
                for opt, val in cp.items(sec):
                    subcp.set(sec.subname, opt, val)
            # set the static params
            subcp.add_section('static_params')
            for param in sparam_map[lbl]:
                subcp.set('static_params', param.subname,
                          cp.get('static_params', param.fullname))
            # set the variable params: for now we'll just set all the
            # variable params as static params
            # so that the model doesn't raise an error looking for
            # prior sections. We'll then manually set the variable
            # params after the model is initialized

            subcp.add_section('variable_params')
            for param in vparam_map[lbl]:
                subcp.set('static_params', param.subname, 'REPLACE')
            # add the outputs from the waveform transforms
            for param in wfparam_map[lbl]:
                subcp.set('static_params', param.subname, 'REPLACE')

            # initialize
            submodel = read_from_config(subcp)
            # move the static params back to variable
            for p in vparam_map[lbl]:
                submodel.static_params.pop(p.subname)
            submodel.variable_params = tuple(p.subname
                                             for p in vparam_map[lbl])
            # remove the waveform transform parameters
            for p in wfparam_map[lbl]:
                submodel.static_params.pop(p.subname)
            # store
            submodels[lbl] = submodel
            logging.info("")
        # now load the model
        logging.info("Loading hierarchical model")
        return super().from_config(cp, submodels=submodels)
Example #13
0
    def test_from_config(self, random_data, request):
        """Test the function which loads data from a configuration file. Here
        we assume we are just marginalizing over distance with a uniform prior
        [50, 5000)
        """
        param = {
            "approximant": "IMRPhenomPv2",
            "f_lower": "20",
            "f_ref": "20",
            "ra": "1.5",
            "dec": "-0.5",
            "polarization": "0.5"
        }

        cp = WorkflowConfigParser()
        cp.add_section("model")
        cp.set("model", "name", "marginalized_gaussian_noise")
        cp.set("model", "distance_marginalization", "")
        cp.add_section("marginalized_prior-distance")
        cp.set("marginalized_prior-distance", "name", "uniform")
        cp.set("marginalized_prior-distance", "min-distance", "50")
        cp.set("marginalized_prior-distance", "max-distance", "5000")
        cp.add_section("variable_params")
        cp.set("variable_params", "tc", "")
        cp.add_section("static_params")
        for key in param.keys():
            cp.set("static_params", key, param[key])
        cp.add_section("prior-tc")
        cp.set("prior-tc", "name", "uniform")
        cp.set("prior-tc", "min-tc", "1126259462.32")
        cp.set("prior-tc", "max-tc", "1126259462.52")

        data = {ifo: random_data for ifo in self.ifos}
        model = models.MarginalizedGaussianNoise.from_config(cp, data)
        marg_priors = model._marg_prior
        keys = list(marg_priors.keys())
        assert keys[0] == "distance"
        assert model._margdist
        assert marg_priors["distance"].bounds["distance"].min == 50.0
        assert marg_priors["distance"].bounds["distance"].max == 5000.0