Esempio n. 1
0
    def _compute_transforms(self):
        dims = self.input_binning.names

        transforms = []
        for group, in_names in self.combine_groups.items():
            xform_shape = [len(in_names)
                           ] + [self.input_binning[d].num_bins for d in dims]

            xform = np.ones(xform_shape)
            input_names = self.input_names
            for i, name in enumerate(in_names):
                scale = 1.
                if '_nc' in name:
                    scale *= self.params.nu_nc_norm.value.m_as('dimensionless')
                #if 'nutau' in name:
                #    scale *= self.params.nutau_norm.value.m_as('dimensionless')
                #if name in ['nutau_cc','nutaubar_cc']:
                #    scale *= self.params.nutau_cc_norm.value.m_as('dimensionless')
                if scale != 1:
                    xform[i] *= scale

            transforms.append(
                BinnedTensorTransform(input_names=in_names,
                                      output_name=group,
                                      input_binning=self.input_binning,
                                      output_binning=self.output_binning,
                                      xform_array=xform))

        return TransformSet(transforms=transforms)
Esempio n. 2
0
    def _compute_transforms(self):
        """For the current parameter values, evaluate the fit function and
        write the resulting scaling into an x-form array"""
        # TODO: use iterators to collapse nested loops
        transforms = []
        for input_name in self.input_names:
            transform = None
            sys_values = []
            for sys in self.sys_list:
                sys_values.append(self.params[sys].magnitude)
            fit_params = self.fit_results[input_name]
            shape = fit_params.shape[:-1]
            if transform is None:
                transform = np.ones(shape)
            for idx in np.ndindex(*shape):
                # At every point evaluate the function
                transform[idx] *= fit_fun(sys_values, *fit_params[idx])

            xform = BinnedTensorTransform(
                input_names=(input_name),
                output_name=input_name,
                input_binning=self.input_binning,
                output_binning=self.output_binning,
                xform_array=transform,
                error_method=self.error_method,
            )
            transforms.append(xform)
        return TransformSet(transforms)
Esempio n. 3
0
    def _compute_transforms(self):
        """Compute new oscillation transforms."""
        # The seed is created from parameter values to produce different sets
        # of transforms for different sets of parameters
        seed = hash_obj(self.params.values, hash_to='int') % (2**32 - 1)
        np.random.seed(seed)

        # Read parameters in in the units used for computation, e.g.
        theta23 = self.params.theta23.m_as('rad')

        transforms = []
        for out_idx, output_name in enumerate(self.output_names):
            if out_idx < 3:
                # neutrinos (-> input names are neutrinos)
                input_names = self.input_names[0:2]
            else:
                # anti-neutrinos (-> input names are anti-neutrinos)
                input_names = self.input_names[2:4]

            # generate the "oscillation probabilities"
            xform = self.create_dummy_osc_probs()

            # create object of type `BinnedTensorTransform` and attach
            # to list of transforms with correct set of input names for the
            # output name in question
            transforms.append(
                BinnedTensorTransform(
                    input_names=input_names,
                    output_name=output_name,
                    # we have already made sure that input and output binnings
                    # are identical
                    input_binning=self.input_binning,
                    output_binning=self.output_binning,
                    xform_array=xform))

        return TransformSet(transforms=transforms)
Esempio n. 4
0
 def _compute_transforms(self):  # pylint: disable=no-self-use
     """Stages that apply transforms to inputs should override this method
     for deriving the transform. No-input stages should leave this as-is."""
     return TransformSet([])
Esempio n. 5
0
class Stage(BaseStage):
    """
    PISA stage base class. Should encompass all behaviors common to (almost)
    all stages.

    Specialization should be done via subclasses.

    Parameters
    ----------
    use_transforms : bool (required)
        Whether or not this stage takes inputs to be transformed (and hence
        implements transforms).

    input_names : None or list of strings

    output_names : None or list of strings

    disk_cache : None, bool, string, or DiskCache
      * If None or False, no disk cache is available.
      * If True, a disk cache is generated at the path
        `CACHE_DIR/<stage_name>/<service_name>.sqlite` where CACHE_DIR is
        defined in pisa.__init__
      * If string, this is interpreted as a path. If an absolute path is
        provided (e.g. "/home/myuser/mycache.sqlite'), this locates the disk
        cache file exactly, while a relative path (e.g.,
        "relative/dir/mycache.sqlite") is taken relative to the CACHE_DIR; the
        aforementioned example will be turned into
        `CACHE_DIR/relative/dir/mycache.sqlite`.
      * If a DiskCache object is passed, it will be used directly

    memcache_deepcopy : bool
        Whether to deepcopy objects prior to storing to the memory cache and
        upon loading these objects from the memory cache. Setting to True
        ensures no modification of mutable objects stored to a memory cache
        will affect other logic relying on that object remaining unchanged.
        However, this comes at the cost of more memory used and slower
        operations.

    outputs_cache_depth : int >= 0

    transforms_cache_depth : int >= 0

    input_binning : None or interpretable as MultiDimBinning

    output_binning : None or interpretable as MultiDimBinning


    Notes
    -----
    The following methods can be overridden in derived classes where
    applicable:
        _derive_nominal_transforms_hash
        _derive_transforms_hash
        _derive_nominal_outputs_hash
        _derive_outputs_hash
        _compute_nominal_transforms
            This is called during initialization to compute what are termed
            "nominal" transforms -- i.e, transforms with all systematic
            parameters set to their nominal values, such that they have no
            effect on the transform. It is optional to use this stage, but if
            it *is* used, then the result will be cached to memory (and
            optionally to disk cache, if one is provided) for future use. A
            nominal transform is useful when systematic parameters merely have
            the effect of modifying the nominal transform, rather than
            requiring a complete recomputation of the transform.
        _compute_nominal_outputs
            same as nominal transforms, but for outputs (e.g. used for
            non-input stages)
        _compute_transforms
            Do the actual work to produce the stage's transforms. For stages
            that specify use_transforms=False, this method is never called.
        _compute_outputs
            Do the actual work to compute the stage's output. Default
            implementation is to call self.transforms.apply(inputs); override
            if no transforms are present or if more needs to be done to
            compute outputs than this.
        validate_params
            Perform validation on any parameters.

    """

    def __init__(
        self,
        use_transforms,
        params=None,
        expected_params=None,
        input_names=None,
        output_names=None,
        error_method=None,
        disk_cache=None,
        memcache_deepcopy=True,
        transforms_cache_depth=10,
        outputs_cache_depth=0,
        input_binning=None,
        output_binning=None,
        debug_mode=None,
    ):
        # Allow for string inputs, but have to populate into lists for
        # consistent interfacing to one or multiple of these things

        logging.warning('This is a cake-style PISA stage, which is DEPRECATED!')

        self.use_transforms = use_transforms
        """Whether or not stage uses transforms"""

        self._events_hash = None

        self.input_binning = input_binning
        self.output_binning = output_binning
        self.validate_binning()

        # init base class!
        super(Stage, self).__init__(
            params=params,
            expected_params=expected_params,
            input_names=input_names,
            output_names=output_names,
            debug_mode=debug_mode,
            error_method=error_method,
        )

        # Storage of latest transforms and outputs; default to empty
        # TransformSet and None, respectively.
        self.transforms = TransformSet([])
        """A stage that takes to-be-transformed inputs and has had these
        transforms computed stores them here. Before computation, `transforms`
        is an empty TransformSet; a stage that does not make use of these (such
        as a no-input stage) has an empty TransformSet."""

        self.memcache_deepcopy = memcache_deepcopy

        self.transforms_cache_depth = int(transforms_cache_depth)

        self.transforms_cache = None
        """Memory cache object for storing transforms"""

        self.nominal_transforms_cache = None
        """Memory cache object for storing nominal transforms"""

        self.full_hash = True
        """Whether to do full hashing if true, otherwise do fast hashing"""

        self.transforms_cache = MemoryCache(
            max_depth=self.transforms_cache_depth,
            is_lru=True,
            deepcopy=self.memcache_deepcopy,
        )
        self.nominal_transforms_cache = MemoryCache(
            max_depth=self.transforms_cache_depth,
            is_lru=True,
            deepcopy=self.memcache_deepcopy,
        )

        self.outputs_cache_depth = int(outputs_cache_depth)

        self.outputs_cache = None
        """Memory cache object for storing outputs (excludes sideband
        objects)."""

        self.outputs_cache = None
        if self.outputs_cache_depth > 0:
            self.outputs_cache = MemoryCache(
                max_depth=self.outputs_cache_depth,
                is_lru=True,
                deepcopy=self.memcache_deepcopy,
            )

        self.disk_cache = disk_cache
        """Disk cache object"""

        self.disk_cache_path = None
        """Path to disk cache file for this stage/service (or None)."""

        # Include each attribute here for hashing if it is defined and its
        # value is not None
        default_attrs_to_hash = [
            "input_names",
            "output_names",
            "input_binning",
            "output_binning",
        ]
        self._attrs_to_hash = set([])
        for attr in default_attrs_to_hash:
            if not hasattr(self, attr):
                continue
            val = getattr(self, attr)
            if val is None:
                continue
            try:
                self.include_attrs_for_hashes(attr)
            except ValueError():
                pass

        self.events = None
        self.nominal_transforms = None

        # Define useful flags and values for debugging behavior after running

        self.nominal_transforms_loaded_from_cache = None
        """Records which cache nominal transforms were loaded from, or None."""

        self.nominal_transforms_computed = False
        """Records whether nominal transforms were (re)computed."""

        self.transforms_loaded_from_cache = None
        """Records which cache transforms were loaded from, or None."""

        self.transforms_computed = False
        """Records whether transforms were (re)computed."""

        self.nominal_outputs_computed = False
        """Records whether nominal outputs were (re)computed."""

        self.outputs_loaded_from_cache = None
        """Records which cache outputs were loaded from, or None."""

        self.outputs_computed = False
        """Records whether outputs were (re)computed."""

        self.nominal_transforms_hash = None
        self.transforms_hash = None
        self.nominal_outputs_hash = None
        self.outputs_hash = None
        self.instantiate_disk_cache()

    @profile
    def get_nominal_transforms(self, nominal_transforms_hash):
        """Load a cached transform from the nominal transform memory cache
        (which is backed by a disk cache, if one is specified) if the nominal
        transform is in the cache, or else recompute it and store to the
        cache(s).

        This method calls the `_compute_nominal_transforms` method, which by
        default does nothing.

        However, if you want to use the nominal transforms feature, override
        the `_compute_nominal_transforms` method and fill in the logic there.

        Deciding whether to invoke the `_compute_nominal_transforms` method or
        to load the nominal transforms from cache is done here, so you needn't
        think about any of this within the `_compute_nominal_transforms`
        method.

        Returns
        -------
        nominal_transforms, hash

        """
        # Reset flags
        self.nominal_transforms_loaded_from_cache = None
        self.nominal_transforms_computed = False

        if nominal_transforms_hash is None:
            nominal_transforms_hash = self._derive_nominal_transforms_hash()

        nominal_transforms = None
        # Quick way to avoid further logic is if hash value is None
        if nominal_transforms_hash is None:
            self.nominal_transforms_hash = None
            self.nominal_transforms = None
            return self.nominal_transforms, self.nominal_transforms_hash

        recompute = True
        # If hash found in memory cache, load nominal transforms from there
        if (
            nominal_transforms_hash in self.nominal_transforms_cache
            and self.debug_mode is None
        ):
            nominal_transforms = self.nominal_transforms_cache[nominal_transforms_hash]
            self.nominal_transforms_loaded_from_cache = "memory"
            recompute = False

        # Otherwise try to load from an extant disk cache
        elif self.disk_cache is not None and self.debug_mode is None:
            try:
                nominal_transforms = self.disk_cache[nominal_transforms_hash]
            except KeyError:
                pass
            else:
                self.nominal_transforms_loaded_from_cache = "disk"
                recompute = False
                # Save to memory cache
                self.nominal_transforms_cache[
                    nominal_transforms_hash
                ] = nominal_transforms

        if recompute:
            self.nominal_transforms_computed = True
            nominal_transforms = self._compute_nominal_transforms()
            if nominal_transforms is None:
                # Invalidate hash value since found transforms
                nominal_transforms_hash = None
            else:
                nominal_transforms.hash = nominal_transforms_hash
                self.nominal_transforms_cache[
                    nominal_transforms_hash
                ] = nominal_transforms
                if self.disk_cache is not None:
                    self.disk_cache[nominal_transforms_hash] = nominal_transforms

        self.nominal_transforms = nominal_transforms
        self.nominal_transforms_hash = nominal_transforms_hash
        return nominal_transforms, nominal_transforms_hash

    @profile
    def get_transforms(self, transforms_hash=None, nominal_transforms_hash=None):
        """Load a cached transform (keyed on hash of parameter values) if it
        is in the cache, or else compute a new transform from currently-set
        parameter values and store this new transform to the cache.

        This calls the private method _compute_transforms (which must be
        implemented in subclasses if the nominal transform feature is desired)
        to generate a new transform if the nominal transform is not found in
        the nominal transform cache.

        Notes
        -----
        The hash used here is only meant to be valid within the scope of a
        session; a hash on the full parameter set used to generate the
        transform *and* the version of the generating software is required for
        non-volatile storage.

        """
        # Reset flags
        self.transforms_loaded_from_cache = None
        self.transforms_computed = False

        # TODO: store nominal transforms to the transforms cache as well, but
        # derive the hash value the same way as it is done for transforms,
        # to avoid needing to apply no systematics to the nominal transforms
        # to get the (identical) transforms?
        # Problem: assumes the nominal transform is the same as the transforms
        # that will result, which *might* not be true (though it seems it will
        # usually be so)

        # Compute nominal transforms; if feature is not used, this doesn't
        # actually do much of anything. To do more than this, override the
        # `_compute_nominal_transforms` method.
        _, nominal_transforms_hash = self.get_nominal_transforms(
            nominal_transforms_hash=nominal_transforms_hash
        )

        # Generate hash from param values
        if transforms_hash is None:
            transforms_hash = self._derive_transforms_hash(
                nominal_transforms_hash=nominal_transforms_hash
            )
        logging.trace("transforms_hash: %s" % str(transforms_hash))

        # Load and return existing transforms if in the cache
        if (
            self.transforms_cache is not None
            and transforms_hash in self.transforms_cache
            and self.debug_mode is None
        ):
            self.transforms_loaded_from_cache = "memory"
            logging.trace("loading transforms from cache.")
            transforms = self.transforms_cache[transforms_hash]

        # Otherwise: compute transforms, set hash, and store to cache
        else:
            self.transforms_computed = True
            logging.trace("computing transforms.")
            transforms = self._compute_transforms()
            transforms.hash = transforms_hash
            if self.transforms_cache is not None:
                self.transforms_cache[transforms_hash] = transforms

        self.check_transforms(transforms)
        self.transforms = transforms
        return transforms

    @profile
    def get_nominal_outputs(self, nominal_outputs_hash):
        """Load a cached output from the nominal outputs memory cache
        (which is backed by a disk cache, if one is specified) if the nominal
        outout is in the cache, or else recompute it and store to the
        cache(s).

        This method calls the `_compute_nominal_outputs` method, which by
        default does nothing.

        However, if you want to use the nominal outputs feature, override
        the `_compute_nominal_outputs` method and fill in the logic there.

        Deciding whether to invoke the `_compute_nominal_outputs` method or
        to load the nominal outputs from cache is done here, so you needn't
        think about any of this within the `_compute_nominal_outputs`
        method.

        Returns
        -------
        nominal_outputs, hash

        """
        if nominal_outputs_hash is None:
            nominal_outputs_hash = self._derive_nominal_outputs_hash()

        if (
            self.nominal_outputs_hash is None
            or self.nominal_outputs_hash != nominal_outputs_hash
        ):
            self._compute_nominal_outputs()
            self.nominal_outputs_hash = nominal_outputs_hash

    # for PI compatibility
    def run(self, inputs=None):
        return self.get_outputs(inputs=inputs)

    @profile
    def get_outputs(self, inputs=None):
        """Top-level function for computing outputs. Use this method to get
        outputs if you live outside this stage/service.

        Caching is handled here, so if the output hash returned by
        `_derive_outputs_hash` is in `outputs_cache`, it is simply returned.
        Otherwise, the `_compute_outputs` private method is invoked to do the
        actual work of computing outputs.

        Parameters
        ----------
        inputs : None or Mapping
            Any inputs to be transformed, plus any sideband objects that are to
            be passed on (untransformed) to subsequent stages.

        See also
        --------
        Overloadable methods called directly from this:
            _derive_outputs_hash
            _compute_outputs

        """
        # Reset flags
        self.outputs_loaded_from_cache = None
        self.outputs_computed = False

        # TODO: store nominal outputs to the outputs cache as well, but
        # derive the hash value the same way as it is done for outputs,
        # to avoid needing to apply no systematics to the nominal outputs
        # to get the (identical) outputs?
        # Problem: assumes the nominal transform is the same as the outputs
        # that will result, which *might* not be true (though it seems it will
        # usually be so)

        # Keep inputs for internal use and for inspection later
        self.inputs = inputs

        outputs_hash, transforms_hash, nominal_transforms_hash = (
            self._derive_outputs_hash()
        )

        # Compute nominal outputs; if feature is not used, this doesn't
        # actually do much of anything. To do more than this, override the
        # `_compute_nominal_outputs` method.
        self.get_nominal_outputs(nominal_outputs_hash=nominal_transforms_hash)

        logging.trace("outputs_hash: %s" % outputs_hash)

        if (
            self.outputs_cache is not None
            and outputs_hash is not None
            and outputs_hash in self.outputs_cache
            and self.debug_mode is None
        ):
            self.outputs_loaded_from_cache = "memory"
            logging.trace("Loading outputs from cache.")
            outputs = self.outputs_cache[outputs_hash]
        else:
            logging.trace("Need to compute outputs...")

            if self.use_transforms:
                self.get_transforms(
                    transforms_hash=transforms_hash,
                    nominal_transforms_hash=nominal_transforms_hash,
                )

            logging.trace("... now computing outputs.")
            outputs = self._compute_outputs(inputs=self.inputs)
            self.check_outputs(outputs)

            if isinstance(outputs, (Map, MapSet)):
                outputs = outputs.rebin(self.output_binning)

            outputs.hash = outputs_hash
            self.outputs_computed = True

            # Store output to cache
            if self.outputs_cache is not None and outputs_hash is not None:
                self.outputs_cache[outputs_hash] = outputs

        # Keep outputs for inspection later
        self.outputs = outputs

        # Attach sideband objects (i.e., inputs not specified in
        # `self.input_names`) to the "augmented" output object
        if self.inputs is None:
            names_in_inputs = set()
        else:
            names_in_inputs = set(self.inputs.names)
        unused_input_names = names_in_inputs.difference(self.input_names)

        if len(unused_input_names) == 0:
            return outputs

        # TODO: update logic for Data object, generic sideband objects
        # Create a new output container different from `outputs` but copying
        # the contents, for purposes of attaching the sideband objects found.
        if isinstance(outputs, MapSet):
            augmented_outputs = MapSet(outputs)
            for name in unused_input_names:
                augmented_outputs.append(inputs[name])

            return augmented_outputs
        else:
            raise TypeError(
                "Outputs are %s, but must currently be a MapSet in"
                " the case that the input includes sideband"
                " objects." % type(outputs)
            )

    def check_transforms(self, transforms):
        """Check that transforms' inputs and outputs match those specified
        for this service.

        Parameters
        ----------
        transforms

        Raises
        ------
        ValueError if transforms' inputs/outputs don't match stage spec

        """
        assert set(transforms.input_names) == set(self.input_names), (
            "Transforms' inputs: "
            + str(transforms.input_names)
            + "\nStage inputs: "
            + str(self.input_names)
        )

        assert set(transforms.output_names) == set(self.output_names), (
            "Transforms' outputs: "
            + str(transforms.output_names)
            + "\nStage outputs: "
            + str(self.output_names)
        )

    def check_outputs(self, outputs):
        """Check that the output names are those expected"""
        if set(outputs.names) != set(self.output_names):
            raise ValueError(
                "'{}' : Outputs found do not match expected outputs for this stage:\n"
                "  Outputs found: {}\n"
                "  Expected stage outputs: {}".format(
                    self.stage_name, outputs.names, self.output_names
                )
            )

    def load_events(self, events):
        """Load events from path given by `events`. Stored as `self.events`.

        Parameters
        ----------
        events : string or Events object
            If string, load events from that location. If Events object,
            deepcopy to obtain `self.events`

        """
        if isinstance(events, Param):
            events = events.value
        elif isinstance(events, basestring):
            events = find_resource(events)
        this_hash = hash_obj(events, full_hash=self.full_hash)
        if self._events_hash is not None and this_hash == self._events_hash:
            return
        logging.debug("Extracting events from Events obj or file: %s", events)
        events_obj = Events(events)
        events_hash = this_hash

        self.events = events_obj
        self._events_hash = events_hash

    def cut_events(self, keep_criteria):
        """Apply a cut to `self.events`, keeping only events that pass
        `keep_criteria`.

        Parameters
        ----------
        keep_criteria : string
             See pisa.core.Events.applyCut for more info on specifying this.

        """
        if isinstance(keep_criteria, Param):
            keep_criteria = keep_criteria.value

        if keep_criteria is not None:
            events = self.events.applyCut(keep_criteria=keep_criteria)
            events_hash = hash_obj(events, full_hash=self.full_hash)

            self.events = events
            self._events_hash = events_hash

    def instantiate_disk_cache(self):
        """Instantiate a disk cache for use by the stage."""
        if isinstance(self.disk_cache, DiskCache):
            self.disk_cache_path = self.disk_cache.path
            return

        if self.disk_cache is False or self.disk_cache is None:
            self.disk_cache = None
            self.disk_cache_path = None
            return

        if isinstance(self.disk_cache, basestring):
            dirpath, filename = os.path.split(
                os.path.expandvars(os.path.expanduser(self.disk_cache))
            )
            if os.path.isabs(dirpath):
                self.disk_cache_path = os.path.join(dirpath, filename)
            else:
                self.disk_cache_path = os.path.join(CACHE_DIR, dirpath, filename)
        elif self.disk_cache is True:
            dirs = [CACHE_DIR, self.stage_name]
            dirpath = os.path.expandvars(os.path.expanduser(os.path.join(*dirs)))
            if self.service_name is not None and self.service_name != "":
                filename = self.service_name + ".sqlite"
            else:
                filename = "generic.sqlite"
            mkdir(dirpath, warn=False)
            self.disk_cache_path = os.path.join(dirpath, filename)
        else:
            raise ValueError("Don't know what to do with a %s." % type(self.disk_cache))

        self.disk_cache = DiskCache(self.disk_cache_path, max_depth=10, is_lru=False)

    def _derive_outputs_hash(self):
        """Derive a hash value that unique identifies the outputs that will be
        generated based upon the current state of the stage.

        This implementation hashes together:
        * Input and output binning objects' hash values (if either input or
          output binning is not None)
        * Current params' values hash
        * Hashes from any input objects with names in `self.input_names`

        If any of the above objects is specified but returns None for its hash
        value, the entire output hash is invalidated, and None is returned.

        """
        id_objects = []

        # If stage uses inputs, grab hash from the inputs container object
        if self.outputs_cache is not None and len(self.input_names) > 0:
            inhash = self.inputs.hash
            logging.trace("inputs.hash = %s" % inhash)
            id_objects.append(inhash)

        # If stage uses transforms, get hash from the transforms
        transforms_hash = None
        if self.use_transforms:
            transforms_hash, nominal_transforms_hash = self._derive_transforms_hash()
            id_objects.append(transforms_hash)
            logging.trace("derived transforms hash = %s" % id_objects[-1])

        # Otherwise, generate sub-hash on binning and param values here
        else:
            transforms_hash, nominal_transforms_hash = None, None

            if self.outputs_cache is not None:
                id_subobjects = []
                # Include all parameter values
                id_subobjects.append(self.params.values_hash)

                # Include additional attributes of this object
                for attr in sorted(self._attrs_to_hash):
                    val = getattr(self, attr)
                    if hasattr(val, "hash"):
                        attr_hash = val.hash
                    elif self.full_hash:
                        norm_val = normQuant(val)
                        attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
                    else:
                        attr_hash = hash_obj(val, full_hash=self.full_hash)
                    id_subobjects.append(attr_hash)

                # Generate the "sub-hash"
                if any([(h is None) for h in id_subobjects]):
                    sub_hash = None
                else:
                    sub_hash = hash_obj(id_subobjects, full_hash=self.full_hash)
                id_objects.append(sub_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if self.outputs_cache is None or any([(h is None) for h in id_objects]):
            outputs_hash = None
        else:
            outputs_hash = hash_obj(id_objects, full_hash=self.full_hash)

        return outputs_hash, transforms_hash, nominal_transforms_hash

    def _derive_transforms_hash(self, nominal_transforms_hash=None):
        """Compute a hash that uniquely identifies the transforms that will be
        produced from the current configuration. Note that this hash needs only
        to be valid for this run (i.e., it is a volatile hash).

        This implementation returns a hash from the current parameters' values.

        """
        id_objects = []
        h = self.params.values_hash
        logging.trace("self.params.values_hash = %s" % h)
        id_objects.append(h)

        # Grab any provided nominal transforms hash, or derive it again
        if nominal_transforms_hash is None:
            nominal_transforms_hash = self._derive_nominal_transforms_hash()
        # If a valid hash has been gotten, include it
        if nominal_transforms_hash is not None:
            id_objects.append(nominal_transforms_hash)

        for attr in sorted(self._attrs_to_hash):
            val = getattr(self, attr)
            if hasattr(val, "hash"):
                attr_hash = val.hash
            elif self.full_hash:
                norm_val = normQuant(val)
                attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
            else:
                attr_hash = hash_obj(val, full_hash=self.full_hash)
            id_objects.append(attr_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if any([(h is None) for h in id_objects]):
            transforms_hash = None
        else:
            transforms_hash = hash_obj(id_objects, full_hash=self.full_hash)

        return transforms_hash, nominal_transforms_hash

    def _derive_nominal_transforms_hash(self):
        """Derive a hash to uniquely identify the nominal transform. This
        should be unique across processes and invocations bacuase the nominal
        transforms can be non-volatile (cached to disk) and must still be
        valid given their hash value upon loading from disk in the future.

        This implementation uses the nominal parameter values' hash
        combined with the source code hash to generate the final nominal
        transforms hash.

        Notes
        -----
        The hashing scheme implemented here might be sufficiently unique for
        many cases, but override this method in services according to the
        following guidelines:

        * Stages that use a nominal transform should override this method if
          the hash is more accurately computed differently from here.

        * Stages that use transforms but do not use nominal transforms can
          override this method with a simpler version that simply returns None
          to save computation time (if this method is found to be a significant
          performance hit). (This method is called each time an output
          is computed if `self.use_transforms == True`.)

        * Stages that use no transforms (i.e., `self.use_transforms == False`)
          will not call any built-in methods related to transforms, so
          overriding this method is irrelevant to such stages.

        If this method *is* overridden (and not just to return None), since the
        nominal transform may be stored to a disk cache, make sure that
        `self.source_code_hash` is included in the objects used to compute the
        final hash value. Even if all parameters are the same, a nominal
        transform stored to disk is ***invalid if the source code changes***,
        and `_derive_nominal_transforms_hash` must reflect this.

        """
        id_objects = []
        id_objects.append(self.params.nominal_values_hash)
        for attr in sorted(self._attrs_to_hash):
            val = getattr(self, attr)
            if hasattr(val, "hash"):
                attr_hash = val.hash
            elif self.full_hash:
                norm_val = normQuant(val)
                attr_hash = hash_obj(norm_val, full_hash=self.full_hash)
            else:
                attr_hash = hash_obj(val, full_hash=self.full_hash)
            id_objects.append(attr_hash)
        id_objects.append(self.source_code_hash)

        # If any hashes are missing (i.e, None), invalidate the entire hash
        if any([(h is None) for h in id_objects]):
            nominal_transforms_hash = None
        else:
            nominal_transforms_hash = hash_obj(id_objects, full_hash=self.full_hash)
        return nominal_transforms_hash

    def _derive_nominal_outputs_hash(self):
        return self._derive_nominal_transforms_hash()

    def _compute_nominal_transforms(self):  # pylint: disable=no-self-use
        """Stages that start with a nominal transform and use systematic
        parameters to modify the nominal transform in order to obtain the final
        transforms should override this method for deriving the nominal
        transform."""
        return None

    def _compute_transforms(self):  # pylint: disable=no-self-use
        """Stages that apply transforms to inputs should override this method
        for deriving the transform. No-input stages should leave this as-is."""
        return TransformSet([])

    def _compute_nominal_outputs(self):  # pylint: disable=no-self-use
        return None

    @profile
    def _compute_outputs(self, inputs):
        """Override this method for no-input stages which do not use transforms.
        Input stages that compute a TransformSet needn't override this, as the
        work for computing outputs is done by the TransfromSet below."""
        return self.transforms.apply(inputs)

    def validate_binning(self):  # pylint: disable=no-self-use
        """Override this method to test if the input and output binning
        (e.g., dimensionality, domains, separately or in combination)
        conform to the transform applied by the stage."""
        return
Esempio n. 6
0
    def __init__(
        self,
        use_transforms,
        params=None,
        expected_params=None,
        input_names=None,
        output_names=None,
        error_method=None,
        disk_cache=None,
        memcache_deepcopy=True,
        transforms_cache_depth=10,
        outputs_cache_depth=0,
        input_binning=None,
        output_binning=None,
        debug_mode=None,
    ):
        # Allow for string inputs, but have to populate into lists for
        # consistent interfacing to one or multiple of these things

        logging.warning('This is a cake-style PISA stage, which is DEPRECATED!')

        self.use_transforms = use_transforms
        """Whether or not stage uses transforms"""

        self._events_hash = None

        self.input_binning = input_binning
        self.output_binning = output_binning
        self.validate_binning()

        # init base class!
        super(Stage, self).__init__(
            params=params,
            expected_params=expected_params,
            input_names=input_names,
            output_names=output_names,
            debug_mode=debug_mode,
            error_method=error_method,
        )

        # Storage of latest transforms and outputs; default to empty
        # TransformSet and None, respectively.
        self.transforms = TransformSet([])
        """A stage that takes to-be-transformed inputs and has had these
        transforms computed stores them here. Before computation, `transforms`
        is an empty TransformSet; a stage that does not make use of these (such
        as a no-input stage) has an empty TransformSet."""

        self.memcache_deepcopy = memcache_deepcopy

        self.transforms_cache_depth = int(transforms_cache_depth)

        self.transforms_cache = None
        """Memory cache object for storing transforms"""

        self.nominal_transforms_cache = None
        """Memory cache object for storing nominal transforms"""

        self.full_hash = True
        """Whether to do full hashing if true, otherwise do fast hashing"""

        self.transforms_cache = MemoryCache(
            max_depth=self.transforms_cache_depth,
            is_lru=True,
            deepcopy=self.memcache_deepcopy,
        )
        self.nominal_transforms_cache = MemoryCache(
            max_depth=self.transforms_cache_depth,
            is_lru=True,
            deepcopy=self.memcache_deepcopy,
        )

        self.outputs_cache_depth = int(outputs_cache_depth)

        self.outputs_cache = None
        """Memory cache object for storing outputs (excludes sideband
        objects)."""

        self.outputs_cache = None
        if self.outputs_cache_depth > 0:
            self.outputs_cache = MemoryCache(
                max_depth=self.outputs_cache_depth,
                is_lru=True,
                deepcopy=self.memcache_deepcopy,
            )

        self.disk_cache = disk_cache
        """Disk cache object"""

        self.disk_cache_path = None
        """Path to disk cache file for this stage/service (or None)."""

        # Include each attribute here for hashing if it is defined and its
        # value is not None
        default_attrs_to_hash = [
            "input_names",
            "output_names",
            "input_binning",
            "output_binning",
        ]
        self._attrs_to_hash = set([])
        for attr in default_attrs_to_hash:
            if not hasattr(self, attr):
                continue
            val = getattr(self, attr)
            if val is None:
                continue
            try:
                self.include_attrs_for_hashes(attr)
            except ValueError():
                pass

        self.events = None
        self.nominal_transforms = None

        # Define useful flags and values for debugging behavior after running

        self.nominal_transforms_loaded_from_cache = None
        """Records which cache nominal transforms were loaded from, or None."""

        self.nominal_transforms_computed = False
        """Records whether nominal transforms were (re)computed."""

        self.transforms_loaded_from_cache = None
        """Records which cache transforms were loaded from, or None."""

        self.transforms_computed = False
        """Records whether transforms were (re)computed."""

        self.nominal_outputs_computed = False
        """Records whether nominal outputs were (re)computed."""

        self.outputs_loaded_from_cache = None
        """Records which cache outputs were loaded from, or None."""

        self.outputs_computed = False
        """Records whether outputs were (re)computed."""

        self.nominal_transforms_hash = None
        self.transforms_hash = None
        self.nominal_outputs_hash = None
        self.outputs_hash = None
        self.instantiate_disk_cache()
Esempio n. 7
0
    def _compute_nominal_transforms(self):
        """Compute new PID transforms."""
        logging.debug('Updating pid.hist PID histograms...')

        # TODO(shivesh): As of now, events do not have units as far as PISA
        # is concerned

        self.load_events(self.params.pid_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # TODO: in future, the events file will not have these combined
        # already, and it should be done here (or in a nominal transform,
        # etc.). See below about taking this step when we move to directly
        # using the I3-HDF5 files.
        #events_file_combined_flavints = tuple([
        #    NuFlavIntGroup(s)
        #    for s in self.events.metadata['flavints_joined']
        #])

        # TODO: take events object as an input instead of as a param that
        # specifies a file? Or handle both cases?

        pid_spec = OrderedDict(eval(self.params.pid_spec.value))
        if set(pid_spec.keys()) != set(self.output_channels):
            msg = 'PID criteria from `pid_spec` {0} does not match {1}'
            raise ValueError(msg.format(pid_spec.keys(), self.output_channels))

        # TODO: add importance weights, error computation

        logging.debug("Separating events by PID...")
        separated_events = OrderedDict()
        for sig in self.output_channels:
            this_sig_events = self.events.applyCut(pid_spec[sig])
            separated_events[sig] = this_sig_events

        # Derive transforms by combining flavints that behave similarly, but
        # apply the derived transforms to the input flavints separately
        # (leaving combining these together to later)
        transforms = []
        for flavint_group in self.transform_groups:
            logging.debug("Working on %s PID", flavint_group)

            repr_flavint = flavint_group[0]

            # TODO(shivesh): errors
            # TODO(shivesh): total histo check?
            sig_histograms = {}
            total_histo = np.zeros(self.output_binning.shape)
            for repr_flavint in flavint_group:
                histo = self.events.histogram(
                    kinds=repr_flavint,
                    binning=self.output_binning,
                    weights_col=self.params.pid_weights_name.value,
                    errors=None).hist
                total_histo += histo

            for sig in self.output_channels:
                sig_histograms[sig] = np.zeros(self.output_binning.shape)
                for repr_flavint in flavint_group:
                    this_sig_histo = separated_events[sig].histogram(
                        kinds=repr_flavint,
                        binning=self.output_binning,
                        weights_col=self.params.pid_weights_name.value,
                        errors=None).hist
                    sig_histograms[sig] += this_sig_histo

            for sig in self.output_channels:
                with np.errstate(divide='ignore', invalid='ignore'):
                    xform_array = sig_histograms[sig] / total_histo

                num_invalid = np.sum(~np.isfinite(xform_array))
                if num_invalid > 0:
                    logging.warn(
                        'Group "%s", PID signature "%s" has %d bins with no'
                        ' events (and hence the ability to separate events'
                        ' by PID cannot be ascertained). These are being'
                        ' masked off from any further computations.',
                        flavint_group, sig, num_invalid)
                    # TODO: this caused buggy event propagation for some
                    # reason; check and re-introduced the masked array idea
                    # when this is fixed. For now, replicating the behavior
                    # from PISA 2.
                    #xform_array = np.ma.masked_invalid(xform_array)

                # Double check that no NaN remain
                #assert not np.any(np.isnan(xform_array))

                # Copy this transform to use for each input in the group
                for input_name in self.input_names:
                    if input_name not in flavint_group:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=input_name,
                        output_name=self.suffix_channel(input_name, sig),
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=xform_array)
                    transforms.append(xform)

        return TransformSet(transforms=transforms)
Esempio n. 8
0
    def _compute_nominal_transforms(self):
        self.load_events(self.params.aeff_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # Units must be the following for correctly converting a sum-of-
        # OneWeights-in-bin to an average effective area across the bin.
        comp_units = dict(true_energy='GeV',
                          true_coszen=None,
                          true_azimuth='rad')

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.input_binning
        }
        #out_units = {dim: unit for dim, unit in comp_units.items()
        #             if dim in self.output_binning}

        # These will be in the computational units
        input_binning = self.input_binning.to(**in_units)

        # Account for "missing" dimension(s) (dimensions OneWeight expects for
        # computation of bin volume), and accommodate with a factor equal to
        # the full range. See IceCube wiki/documentation for OneWeight for
        # more info.
        missing_dims_vol = 1
        # TODO: currently, azimuth required to *not* be part of input binning
        if 'true_azimuth' not in input_binning:
            missing_dims_vol *= 2 * np.pi
        # TODO: Following is currently never the case, handle?
        if 'true_coszen' not in input_binning:
            missing_dims_vol *= 2

        nominal_transforms = []

        for xform_flavints in self.transform_groups:
            logging.info("Working on %s effective areas xform", xform_flavints)

            raw_hist = self.events.histogram(kinds=xform_flavints,
                                             binning=input_binning,
                                             weights_col='weighted_aeff',
                                             errors=True)
            raw_transform = unp.nominal_values(raw_hist.hist)
            raw_errors = unp.std_devs(raw_hist.hist)

            # Divide histogram by
            #   (energy bin width x coszen bin width x azimuth bin width)
            # volumes to convert from sums-of-OneWeights-in-bins to
            # effective areas. Note that volume correction factor for
            # missing dimensions is applied here.
            bin_volumes = input_binning.bin_volumes(attach_units=False)
            raw_transform /= (bin_volumes * missing_dims_vol)
            raw_errors /= (bin_volumes * missing_dims_vol)

            e_idx = input_binning.index('true_energy')
            if e_idx == 1:
                # transpose
                raw_transform = raw_transform.T
                raw_errors = raw_errors.T

            # Do the smoothing
            smooth_transform = self.smooth(raw_transform, raw_errors,
                                           input_binning['true_energy'],
                                           input_binning['true_coszen'])

            if e_idx == 1:
                # transpose back
                smooth_transform = smooth_transform.T

            nominal_transforms.extend(
                populate_transforms(service=self,
                                    xform_flavints=xform_flavints,
                                    xform_array=smooth_transform))

        return TransformSet(transforms=nominal_transforms)
Esempio n. 9
0
    def _compute_nominal_transforms(self):
        self.load_events(self.params.aeff_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # Units must be the following for correctly converting a sum-of-
        # OneWeights-in-bin to an average effective area across the bin.
        comp_units = dict(true_energy='GeV', true_coszen=None,
                          true_azimuth='rad')

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {dim: unit for dim, unit in comp_units.items()
                    if dim in self.input_binning}

        # TODO: use out_units for some kind of conversion?
        #out_units = {dim: unit for dim, unit in comp_units.items()
        #             if dim in self.output_binning}

        # These will be in the computational units
        input_binning = self.input_binning.to(**in_units)

        # Account for "missing" dimension(s) (dimensions OneWeight expects for
        # computation of bin volume), and accommodate with a factor equal to
        # the full range. See IceCube wiki/documentation for OneWeight for
        # more info.
        missing_dims_vol = 1
        if 'true_azimuth' not in input_binning:
            missing_dims_vol *= 2*np.pi
        if 'true_coszen' not in input_binning:
            missing_dims_vol *= 2

        if bool(self.debug_mode):
            outdir = os.path.join(find_resource('debug'),
                                  self.stage_name,
                                  self.service_name)
            mkdir(outdir)
            #hex_hash = hash2hex(kde_hash)

        bin_volumes = input_binning.bin_volumes(attach_units=False)
        norm_volumes = bin_volumes * missing_dims_vol

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s effective areas xform',
                          xform_flavints)

            aeff_transform = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning,
                weights_col='weighted_aeff',
                errors=(self.error_method not in [None, False])
            )
            aeff_transform = aeff_transform.hist

            # Divide histogram by
            #   (energy bin width x coszen bin width x azimuth bin width)
            # volumes to convert from sums-of-OneWeights-in-bins to
            # effective areas. Note that volume correction factor for
            # missing dimensions is applied here.
            aeff_transform /= norm_volumes

            if self.debug_mode:
                outfile = os.path.join(
                    outdir, 'aeff_' + str(xform_flavints) + '.pkl'
                )
                to_file(aeff_transform, outfile)

            nominal_transforms.extend(
                populate_transforms(
                    service=self,
                    xform_flavints=xform_flavints,
                    xform_array=aeff_transform
                )
            )

        return TransformSet(transforms=nominal_transforms)
Esempio n. 10
0
def compute_transforms(service):
    """Compute effective area transforms, taking aeff systematics into account.

    Systematics are: `aeff_scale`, `livetime`, and `nutau_cc_norm`

    """
    aeff_scale = service.params.aeff_scale.m_as('dimensionless')
    livetime_s = service.params.livetime.m_as('sec')
    base_scale = aeff_scale * livetime_s

    logging.trace('livetime = %s --> %s sec',
                  service.params.livetime.value, livetime_s)

    if service.particles == 'neutrinos':
        if not hasattr(service, 'nutau_cc_norm_must_be_one'):
            service.nutau_cc_norm_must_be_one = False
            """If any flav/ints besides nutau_cc and nutaubar_cc are grouped
            with one or both of those for transforms, then a
            `nutau_cc_norm` != 1 cannot be applied."""

            nutaucc_and_nutaubarcc = set(NuFlavIntGroup('nutau_cc+nutaubar_cc'))
            for group in service.transform_groups:
                # If nutau_cc, nutaubar_cc, or both are the group and other flavors
                # are present, nutau_cc_norm must be one!
                group_set = set(group)
                if group_set.intersection(nutaucc_and_nutaubarcc) and \
                        group_set.difference(nutaucc_and_nutaubarcc):
                    service.nutau_cc_norm_must_be_one = True

        nutau_cc_norm = service.params.nutau_cc_norm.m_as('dimensionless')
        if nutau_cc_norm != 1 and service.nutau_cc_norm_must_be_one:
            raise ValueError(
                '`nutau_cc_norm` = %e but can only be != 1 if nutau CC and'
                ' nutaubar CC are separated from other flav/ints.'
                ' Transform groups are: %s'
                % (nutau_cc_norm, service.transform_groups)
            )

    if hasattr(service, 'sum_grouped_flavints'):
        sum_grouped_flavints = service.sum_grouped_flavints
    else:
        sum_grouped_flavints = False

    new_transforms = []
    for transform in service.nominal_transforms:
        this_scale = base_scale
        if service.particles == 'neutrinos':
            out_nfig = NuFlavIntGroup(transform.output_name)
            if 'nutau_cc' in out_nfig or 'nutaubar_cc' in out_nfig:
                this_scale *= nutau_cc_norm

        if this_scale != 1:
            aeff_transform = transform.xform_array * this_scale
        else:
            aeff_transform = transform.xform_array

        new_xform = BinnedTensorTransform(
            input_names=transform.input_names,
            output_name=transform.output_name,
            input_binning=transform.input_binning,
            output_binning=transform.output_binning,
            xform_array=aeff_transform,
            sum_inputs=sum_grouped_flavints
        )
        new_transforms.append(new_xform)

    return TransformSet(new_transforms)
Esempio n. 11
0
    def _compute_nominal_transforms(self):
        """Compute cross-section transforms."""
        logging.info('Updating xsec.genie cross-section histograms...')

        self.load_xsec_splines()
        livetime = self._ev_param(self.params['livetime'].value)
        ice_p = self._ev_param(self.params['ice_p'].value)
        fid_vol = self._ev_param(self.params['fid_vol'].value)
        mr_h20 = self._ev_param(self.params['mr_h20'].value)
        x_energy_scale = self.params['x_energy_scale'].value

        input_binning = self.input_binning

        ebins = input_binning.true_energy
        for idx, name in enumerate(input_binning.names):
            if 'true_energy' in name:
                e_idx = idx

        xsec_transforms = {}
        for flav in self.input_names:
            for int_ in ALL_NUINT_TYPES:
                flavint = flav + '_' + str(int_)
                logging.debug('Obtaining cross-sections for %s', flavint)
                xsec_map = self.xsec.get_map(flavint,
                                             MultiDimBinning([ebins]),
                                             x_energy_scale=x_energy_scale)

                def func(idx):
                    if idx == e_idx:
                        return xsec_map.hist
                    return tuple(range(input_binning.shape[idx]))

                num_dims = input_binning.num_dims
                xsec_trns = np.meshgrid(*map(func, range(num_dims)),
                                        indexing='ij')[e_idx]
                xsec_trns *= (livetime * fid_vol * (ice_p / mr_h20) *
                              (6.022140857e+23 / ureg.mol))
                xsec_transforms[NuFlavInt(flavint)] = xsec_trns

        nominal_transforms = []
        for flavint_group in self.transform_groups:
            flav_names = [str(flav) for flav in flavint_group.flavs]
            for input_name in self.input_names:
                if input_name not in flav_names:
                    continue

                xform_array = []
                for flavint in flavint_group.flavints:
                    if flavint in xsec_transforms:
                        xform_array.append(xsec_transforms[flavint])
                xform_array = reduce(add, xform_array)

                xform = BinnedTensorTransform(
                    input_names=input_name,
                    output_name=str(flavint_group),
                    input_binning=input_binning,
                    output_binning=self.output_binning,
                    xform_array=xform_array)
                nominal_transforms.append(xform)

        return TransformSet(transforms=nominal_transforms)
Esempio n. 12
0
    def _compute_transforms(self):
        """Generate reconstruction "smearing kernels" by histogramming true and
        reconstructed variables from a Monte Carlo events file.

        The resulting transform is a 2N-dimensional histogram, where N is the
        dimensionality of the input binning. The transform maps the truth bin
        counts to the reconstructed bin counts.

        I.e., for the case of 1D input binning, the ith element of the
        reconstruction kernel will be a map showing the distribution of events
        over all the reco space from truth bin i. This will be normalised to
        the total number of events in truth bin i.

        Notes
        -----
        In the current implementation these histograms are made
        **UN**weighted. This is probably quite wrong...

        """
        e_res_scale = self.params.e_res_scale.value.m_as('dimensionless')
        cz_res_scale = self.params.cz_res_scale.value.m_as('dimensionless')
        e_reco_bias = self.params.e_reco_bias.value.m_as('GeV')
        cz_reco_bias = self.params.cz_reco_bias.value.m_as('dimensionless')
        res_scale_ref = self.params.res_scale_ref.value.strip().lower()
        assert res_scale_ref in ['zero']  # TODO: , 'mean', 'median']

        self.load_events(self.params.reco_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # Computational units must be the following for compatibility with
        # events file
        comp_units = dict(true_energy='GeV',
                          true_coszen=None,
                          true_azimuth='rad',
                          reco_energy='GeV',
                          reco_coszen=None,
                          reco_azimuth='rad',
                          pid=None)

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.input_binning
        }
        out_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.output_binning
        }

        # These binnings will be in the computational units defined above
        input_binning = self.input_binning.to(**in_units)
        output_binning = self.output_binning.to(**out_units)

        xforms = []
        for xform_flavints in self.transform_groups:
            logging.debug("Working on %s reco kernels" % xform_flavints)

            repr_flavint = xform_flavints[0]

            true_energy = self.events[repr_flavint]['true_energy']
            true_coszen = self.events[repr_flavint]['true_coszen']
            reco_energy = self.events[repr_flavint]['reco_energy']
            reco_coszen = self.events[repr_flavint]['reco_coszen']
            e_reco_err = reco_energy - true_energy
            cz_reco_err = reco_coszen - true_coszen

            if self.params.res_scale_ref.value.strip().lower() == 'zero':
                self.events[repr_flavint]['reco_energy'] = (
                    true_energy + e_reco_err * e_res_scale + e_reco_bias)
                self.events[repr_flavint]['reco_coszen'] = (
                    true_coszen + cz_reco_err * cz_res_scale + cz_reco_bias)

            # True (input) + reco {+ PID} (output)-dimensional histogram
            # is the basis for the transformation
            reco_kernel = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning * output_binning,
                weights_col=self.params.reco_weights_name.value,
                errors=(self.error_method not in [None, False]))
            # Extract just the numpy array to work with
            reco_kernel = reco_kernel.hist

            # This takes into account the correct kernel normalization:
            # What this means is that we have to normalise the reco map
            # to the number of events in the truth bin.
            #
            # I.e., we have N events from the truth bin which then become
            # spread out over the whole map due to reconstruction.
            # The normalisation is dividing this map by N.
            #
            # Previously this was hard-coded for 2 dimensions, but I have tried
            # to generalise it to arbitrary dimensionality.

            # Truth-only (N-dimensional) histogram will be used for
            # normalization (so transform is in terms of fraction-of-events in
            # input--i.e. truth--bin). Sum over the input dimensions.
            true_event_counts = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning,
                weights_col=self.params.reco_weights_name.value,
                errors=(self.error_method not in [None, False]))
            # Extract just the numpy array to work with
            true_event_counts = true_event_counts.hist

            # If there weren't any events in the input (true_*) bin, make this
            # bin have no effect -- i.e., populate all output bins
            # corresponding to the input bin with zeros via `nan_to_num`.
            with np.errstate(divide='ignore', invalid='ignore'):
                true_event_counts[true_event_counts == 0] = np.nan
                norm_factors = 1.0 / true_event_counts
                norm_factors = np.nan_to_num(norm_factors)

            # Numpy broadcasts lower-dimensional things to higher dimensions
            # from last dimension to first; if we simply mult the reco_kernel
            # by norm_factors, this will apply the normalization to the
            # __output__ dimensions rather than the input dimensions. Add
            # "dummy" dimensions to norm_factors where we want the "extra
            # dimensions": at the end.
            for dim in self.output_binning:
                norm_factors = np.expand_dims(norm_factors, axis=-1)

            # Apply the normalization to the kernels
            reco_kernel *= norm_factors

            assert np.all(reco_kernel >= 0), \
                    'number of elements less than 0 = %d' \
                    % np.sum(reco_kernel < 0)
            sum_over_axes = tuple(range(-len(self.output_binning), 0))
            totals = np.sum(reco_kernel, axis=sum_over_axes)
            assert np.all(
                totals <= 1 + 1e-14), 'max = ' + str(np.max(totals) - 1)

            # Now populate this transform to each input for which it applies.

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    input_flavs = NuFlavIntGroup(input_name)
                    if len(set(xform_flavints).intersection(input_flavs)) > 0:
                        xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=output_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                        sum_inputs=self.sum_grouped_flavints)
                    xforms.append(xform)
            else:
                # NOTES:
                # * Output name is same as input name
                # * Use `self.input_binning` and `self.output_binning` so maps
                #   are returned in user-defined units (rather than
                #   computational units, which are attached to the non-`self`
                #   versions of these binnings).
                for input_name in self.input_names:
                    if input_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=input_name,
                        output_name=input_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                    )
                    xforms.append(xform)

        return TransformSet(transforms=xforms)
Esempio n. 13
0
    def _compute_transforms(self):
        """
        Generate reconstruction "smearing kernels" by reading in a set of
        parameterisation functions from a json file. This should have the same
        dimensionality as the input binning i.e. if you have energy and
        coszenith input binning then the kernels provided should have both
        energy and coszenith resolution functions.

        Any superposition of distributions from scipy.stats is supported.
        """
        res_scale_ref = self.params.res_scale_ref.value.strip().lower()
        assert res_scale_ref in ['zero']  # TODO: , 'mean', 'median']

        reco_param_source = self.params.reco_paramfile.value

        if reco_param_source is None:
            raise ValueError(
                'non-None reco parameterization params.reco_paramfile'
                ' must be provided')

        reco_param_hash = hash_obj(reco_param_source)

        if (self._reco_param_hash is None
                or reco_param_hash != self._reco_param_hash):
            reco_param = load_reco_param(reco_param_source)

            # Transform groups are implicitly defined by the contents of the
            # reco paramfile's keys
            implicit_transform_groups = reco_param.keys()

            # Make sure these match transform groups specified for the stage
            if set(implicit_transform_groups) != set(self.transform_groups):
                raise ValueError(
                    'Transform groups (%s) defined implicitly by'
                    ' %s reco parameterizations do not match those'
                    ' defined as the stage\'s `transform_groups` (%s).' %
                    (implicit_transform_groups, reco_param_source,
                     self.transform_groups))

            self.param_dict = reco_param
            self._reco_param_hash = reco_param_hash

            self.eval_dict = self.evaluate_reco_param()
            self.reco_scales_and_biases_applicable()

        # everything seems to be fine, so rescale and shift distributions
        eval_dict = self.scale_and_shift_reco_dists()

        # Computational units must be the following for compatibility with
        # events file
        comp_units = dict(true_energy='GeV',
                          true_coszen=None,
                          true_azimuth='rad',
                          reco_energy='GeV',
                          reco_coszen=None,
                          reco_azimuth='rad',
                          pid=None)

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.input_binning
        }
        out_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.output_binning
        }

        # These binnings will be in the computational units defined above
        input_binning = self.input_binning.to(**in_units)
        output_binning = self.output_binning.to(**out_units)
        en_centers_in = self.input_binning[
            'true_energy'].weighted_centers.magnitude
        en_edges_in = self.input_binning['true_energy'].bin_edges.magnitude
        cz_centers_in = self.input_binning[
            'true_coszen'].weighted_centers.magnitude
        cz_edges_in = self.input_binning['true_coszen'].bin_edges.magnitude
        en_edges_out = self.output_binning['reco_energy'].bin_edges.magnitude
        cz_edges_out = self.output_binning['reco_coszen'].bin_edges.magnitude

        n_e_in = len(en_centers_in)
        n_cz_in = len(cz_centers_in)
        n_e_out = len(en_edges_out) - 1
        n_cz_out = len(cz_edges_out) - 1

        if self.coszen_flipback:
            cz_edges_out, flipback_mask, keep = \
                self.extend_binning_for_coszen(ext_low=-3., ext_high=+3.)

        xforms = []
        for xform_flavints in self.transform_groups:
            logging.debug("Working on %s reco kernel..." % xform_flavints)

            this_params = eval_dict[xform_flavints]
            reco_kernel = np.zeros((n_e_in, n_cz_in, n_e_out, n_cz_out))

            for (i, j) in itertools.product(range(n_e_in), range(n_cz_in)):
                e_kern_cdf = self.make_cdf(bin_edges=en_edges_out,
                                           enval=en_centers_in[i],
                                           enindex=i,
                                           czval=None,
                                           czindex=j,
                                           dist_params=this_params['energy'])
                cz_kern_cdf = self.make_cdf(bin_edges=cz_edges_out,
                                            enval=en_centers_in[i],
                                            enindex=i,
                                            czval=cz_centers_in[j],
                                            czindex=j,
                                            dist_params=this_params['coszen'])

                if self.coszen_flipback:
                    cz_kern_cdf = perform_coszen_flipback(
                        cz_kern_cdf, flipback_mask, keep)

                reco_kernel[i, j] = np.outer(e_kern_cdf, cz_kern_cdf)

            # Sanity check of reco kernels - intolerable negative values?
            logging.trace(" Ensuring reco kernel sanity...")
            kern_neg_invalid = reco_kernel < -EQUALITY_PREC
            if np.any(kern_neg_invalid):
                raise ValueError("Detected intolerable negative entries in"
                                 " reco kernel! Min.: %.15e" %
                                 np.min(reco_kernel))

            # Set values numerically compatible with zero to zero
            np.where((np.abs(reco_kernel) < EQUALITY_PREC), reco_kernel, 0)
            sum_over_axes = tuple(range(-len(self.output_binning), 0))
            totals = np.sum(reco_kernel, axis=sum_over_axes)
            totals_large = totals > (1 + EQUALITY_PREC)
            if np.any(totals_large):
                raise ValueError("Detected overflow in reco kernel! Max.:"
                                 " %0.15e" % (np.max(totals)))

            if self.input_binning.basenames[0] == "coszen":
                # The reconstruction kernel has been set up with energy as its
                # first dimension, so swap axes if it is applied to an input
                # binning where 'coszen' is the first
                logging.trace(" Swapping kernel dimensions since 'coszen' has"
                              " been requested as the first.")
                reco_kernel = np.swapaxes(reco_kernel, 0, 1)
                reco_kernel = np.swapaxes(reco_kernel, 2, 3)

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    if set(NuFlavIntGroup(input_name)).isdisjoint(
                            xform_flavints):
                        continue
                    xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=output_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                        sum_inputs=self.sum_grouped_flavints)
                    xforms.append(xform)
            # If *not* combining grouped flavints:
            # Copy the transform for each input flavor, regardless if the
            # transform is computed from a combination of flavors.
            else:
                for input_name in self.input_names:
                    if set(NuFlavIntGroup(input_name)).isdisjoint(
                            xform_flavints):
                        continue
                    for output_name in self.output_names:
                        if (output_name not in NuFlavIntGroup(input_name)
                                or output_name not in xform_flavints):
                            continue
                        logging.trace('  input: %s, output: %s, xform: %s',
                                      input_name, output_name, xform_flavints)

                        xform = BinnedTensorTransform(
                            input_names=input_name,
                            output_name=output_name,
                            input_binning=self.input_binning,
                            output_binning=self.output_binning,
                            xform_array=reco_kernel,
                            sum_inputs=self.sum_grouped_flavints)
                        xforms.append(xform)

        return TransformSet(transforms=xforms)
Esempio n. 14
0
    def _compute_nominal_transforms(self):
        """Compute new PID transforms."""
        logging.debug('Updating pid.param PID histograms...')

        self.load_pid_energy_param(self.params.pid_energy_paramfile.value)

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s PID', xform_flavints)

            xform_array = np.empty(self.transform_output_binning.shape)

            subdict = self.pid_energy_param_dict[xform_flavints]
            for signature, sig_param_func in subdict.items():
                # Get the PID probabilities vs. energy at the energy bins'
                # (weighted) centers
                pid1d = sig_param_func(self.ebin_centers)

                # Broadcast this 1d array across the reco_coszen dimension
                # since it's independent of reco_coszen
                broadcasted_pid = self.transform_output_binning.broadcast(
                    pid1d, from_dim='reco_energy', to_dims='reco_coszen')

                pid_indexer = (self.transform_output_binning.indexer(
                    pid=signature))

                # Assign the broadcasted array to the correct PID bin
                xform_array[pid_indexer] = broadcasted_pid

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    input_flavs = NuFlavIntGroup(input_name)
                    if set(xform_flavints).intersection(input_flavs):
                        xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=str(xform_flavints),
                        input_binning=self.input_binning,
                        output_binning=self.transform_output_binning,
                        xform_array=xform_array,
                        sum_inputs=self.sum_grouped_flavints)
                    nominal_transforms.append(xform)

            else:
                for input_name in self.input_names:
                    if input_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=input_name,
                        output_name=input_name,
                        input_binning=self.input_binning,
                        output_binning=self.transform_output_binning,
                        xform_array=xform_array,
                    )
                    nominal_transforms.append(xform)

        return TransformSet(transforms=nominal_transforms)
Esempio n. 15
0
    def _compute_nominal_transforms(self):
        """Compute parameterised effective area transforms"""
        energy_param_source = self.params.aeff_energy_paramfile.value
        coszen_param_source = self.params.aeff_coszen_paramfile.value

        energy_param_hash = hash_obj(energy_param_source)
        coszen_param_hash = hash_obj(coszen_param_source)

        load_energy = False
        load_coszen = False
        if (self._param_hashes['energy'] is None
                or energy_param_hash != self._param_hashes['energy']):
            load_energy = True

        if (self.has_cz
                and (self._param_hashes['coszen'] is None
                     or energy_param_hash != self._param_hashes)):
            load_coszen = True

        if energy_param_source is None:
            raise ValueError(
                'non-None energy parameterization params.aeff_energy_paramfile'
                ' must be provided'
            )
        if not self.has_cz and coszen_param_source is not None:
            raise ValueError(
                'true_coszen dimension was not found in the binning but a'
                ' coszen parameterisation file has been provided by'
                ' `params.aeff_coszen_paramfile`.'
            )

        if not (load_energy or load_coszen):
            return

        dims = ['energy', 'coszen']
        loads = [load_energy, load_coszen]
        sources = [energy_param_source, coszen_param_source]
        hashes = [energy_param_hash, coszen_param_hash]

        for dim, load, source, hash_ in zip(dims, loads, sources, hashes):
            if not load:
                continue
            self._param_hashes[dim] = None
            self.aeff_params[dim] = None
            params = load_aeff_param(source)

            # Transform groups are implicitly defined by the contents of the
            # `pid_energy_paramfile`'s keys
            implicit_transform_groups = params.keys()

            # Make sure these match transform groups specified for the stage
            if set(implicit_transform_groups) != set(self.transform_groups):
                raise ValueError(
                    'Transform groups (%s) defined implicitly by'
                    ' %s aeff parameterizations "%s"  do not match those'
                    ' defined as the stage\'s `transform_groups` (%s).'
                    % (implicit_transform_groups, dim, source,
                       self.transform_groups)
                )

            self.aeff_params[dim] = params
            self._param_hashes[dim] = hash_

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s effective areas xform',
                          xform_flavints)

            energy_param_func = self.aeff_params['energy'][xform_flavints]
            coszen_param_func = None
            if self.aeff_params['coszen'] is not None:
                coszen_param_func = self.aeff_params['coszen'][xform_flavints]

            # Now calculate the 1D aeff along energy
            aeff_vs_e = energy_param_func(self.ecen)

            # NOTE/TODO: Below is taken from the PISA 2 implementation of this.
            # Almost certainly comes from the fact that the highest knot there
            # was 79.5 GeV with the upper energy bin edge being 80 GeV. There's
            # probably something better that could be done here...

            # Correct for final energy bin, since interpolation does not
            # extend to JUST right outside the final bin
            if aeff_vs_e[-1] == 0:
                aeff_vs_e[-1] = aeff_vs_e[-2]

            if self.has_cz:
                aeff_vs_e = self.input_binning.broadcast(
                    aeff_vs_e, from_dim='true_energy', to_dims='true_coszen'
                )

                if coszen_param_func is not None:
                    aeff_vs_cz = coszen_param_func(self.czcen)
                    # Normalize
                    aeff_vs_cz *= len(aeff_vs_cz) / np.sum(aeff_vs_cz)
                else:
                    aeff_vs_cz = np.ones(shape=len(self.czcen))

                cz_broadcasted = self.input_binning.broadcast(
                    aeff_vs_cz, from_dim='true_coszen', to_dims='true_energy'
                )
                aeff_transform = aeff_vs_e * cz_broadcasted
            else:
                aeff_transform = aeff_vs_e

            nominal_transforms.extend(
                populate_transforms(
                    service=self,
                    xform_flavints=xform_flavints,
                    xform_array=aeff_transform
                )
            )

        return TransformSet(transforms=nominal_transforms)