Beispiel #1
0
    def _compute_transforms(self):
        """For the current parameter values, evaluate the fit function and
        write the resulting scaling into an x-form array"""
        # TODO: use iterators to collapse nested loops
        transforms = []
        for input_name in self.input_names:
            transform = None
            sys_values = []
            for sys in self.sys_list:
                sys_values.append(self.params[sys].magnitude)
            fit_params = self.fit_results[input_name]
            shape = fit_params.shape[:-1]
            if transform is None:
                transform = np.ones(shape)
            for idx in np.ndindex(*shape):
                # At every point evaluate the function
                transform[idx] *= fit_fun(sys_values, *fit_params[idx])

            xform = BinnedTensorTransform(
                input_names=(input_name),
                output_name=input_name,
                input_binning=self.input_binning,
                output_binning=self.output_binning,
                xform_array=transform,
                error_method=self.error_method,
            )
            transforms.append(xform)
        return TransformSet(transforms)
Beispiel #2
0
    def _compute_transforms(self):
        dims = self.input_binning.names

        transforms = []
        for group, in_names in self.combine_groups.items():
            xform_shape = [len(in_names)
                           ] + [self.input_binning[d].num_bins for d in dims]

            xform = np.ones(xform_shape)
            input_names = self.input_names
            for i, name in enumerate(in_names):
                scale = 1.
                if '_nc' in name:
                    scale *= self.params.nu_nc_norm.value.m_as('dimensionless')
                #if 'nutau' in name:
                #    scale *= self.params.nutau_norm.value.m_as('dimensionless')
                #if name in ['nutau_cc','nutaubar_cc']:
                #    scale *= self.params.nutau_cc_norm.value.m_as('dimensionless')
                if scale != 1:
                    xform[i] *= scale

            transforms.append(
                BinnedTensorTransform(input_names=in_names,
                                      output_name=group,
                                      input_binning=self.input_binning,
                                      output_binning=self.output_binning,
                                      xform_array=xform))

        return TransformSet(transforms=transforms)
Beispiel #3
0
    def _compute_transforms(self):
        """Compute new oscillation transforms."""
        # The seed is created from parameter values to produce different sets
        # of transforms for different sets of parameters
        seed = hash_obj(self.params.values, hash_to='int') % (2**32 - 1)
        np.random.seed(seed)

        # Read parameters in in the units used for computation, e.g.
        theta23 = self.params.theta23.m_as('rad')

        transforms = []
        for out_idx, output_name in enumerate(self.output_names):
            if out_idx < 3:
                # neutrinos (-> input names are neutrinos)
                input_names = self.input_names[0:2]
            else:
                # anti-neutrinos (-> input names are anti-neutrinos)
                input_names = self.input_names[2:4]

            # generate the "oscillation probabilities"
            xform = self.create_dummy_osc_probs()

            # create object of type `BinnedTensorTransform` and attach
            # to list of transforms with correct set of input names for the
            # output name in question
            transforms.append(
                BinnedTensorTransform(
                    input_names=input_names,
                    output_name=output_name,
                    # we have already made sure that input and output binnings
                    # are identical
                    input_binning=self.input_binning,
                    output_binning=self.output_binning,
                    xform_array=xform))

        return TransformSet(transforms=transforms)
Beispiel #4
0
 def _compute_transforms(self):  # pylint: disable=no-self-use
     """Stages that apply transforms to inputs should override this method
     for deriving the transform. No-input stages should leave this as-is."""
     return TransformSet([])
Beispiel #5
0
    def __init__(
        self,
        use_transforms,
        params=None,
        expected_params=None,
        input_names=None,
        output_names=None,
        error_method=None,
        disk_cache=None,
        memcache_deepcopy=True,
        transforms_cache_depth=10,
        outputs_cache_depth=0,
        input_binning=None,
        output_binning=None,
        debug_mode=None,
    ):
        # Allow for string inputs, but have to populate into lists for
        # consistent interfacing to one or multiple of these things

        logging.warning('This is a cake-style PISA stage, which is DEPRECATED!')

        self.use_transforms = use_transforms
        """Whether or not stage uses transforms"""

        self._events_hash = None

        self.input_binning = input_binning
        self.output_binning = output_binning
        self.validate_binning()

        # init base class!
        super(Stage, self).__init__(
            params=params,
            expected_params=expected_params,
            input_names=input_names,
            output_names=output_names,
            debug_mode=debug_mode,
            error_method=error_method,
        )

        # Storage of latest transforms and outputs; default to empty
        # TransformSet and None, respectively.
        self.transforms = TransformSet([])
        """A stage that takes to-be-transformed inputs and has had these
        transforms computed stores them here. Before computation, `transforms`
        is an empty TransformSet; a stage that does not make use of these (such
        as a no-input stage) has an empty TransformSet."""

        self.memcache_deepcopy = memcache_deepcopy

        self.transforms_cache_depth = int(transforms_cache_depth)

        self.transforms_cache = None
        """Memory cache object for storing transforms"""

        self.nominal_transforms_cache = None
        """Memory cache object for storing nominal transforms"""

        self.full_hash = True
        """Whether to do full hashing if true, otherwise do fast hashing"""

        self.transforms_cache = MemoryCache(
            max_depth=self.transforms_cache_depth,
            is_lru=True,
            deepcopy=self.memcache_deepcopy,
        )
        self.nominal_transforms_cache = MemoryCache(
            max_depth=self.transforms_cache_depth,
            is_lru=True,
            deepcopy=self.memcache_deepcopy,
        )

        self.outputs_cache_depth = int(outputs_cache_depth)

        self.outputs_cache = None
        """Memory cache object for storing outputs (excludes sideband
        objects)."""

        self.outputs_cache = None
        if self.outputs_cache_depth > 0:
            self.outputs_cache = MemoryCache(
                max_depth=self.outputs_cache_depth,
                is_lru=True,
                deepcopy=self.memcache_deepcopy,
            )

        self.disk_cache = disk_cache
        """Disk cache object"""

        self.disk_cache_path = None
        """Path to disk cache file for this stage/service (or None)."""

        # Include each attribute here for hashing if it is defined and its
        # value is not None
        default_attrs_to_hash = [
            "input_names",
            "output_names",
            "input_binning",
            "output_binning",
        ]
        self._attrs_to_hash = set([])
        for attr in default_attrs_to_hash:
            if not hasattr(self, attr):
                continue
            val = getattr(self, attr)
            if val is None:
                continue
            try:
                self.include_attrs_for_hashes(attr)
            except ValueError():
                pass

        self.events = None
        self.nominal_transforms = None

        # Define useful flags and values for debugging behavior after running

        self.nominal_transforms_loaded_from_cache = None
        """Records which cache nominal transforms were loaded from, or None."""

        self.nominal_transforms_computed = False
        """Records whether nominal transforms were (re)computed."""

        self.transforms_loaded_from_cache = None
        """Records which cache transforms were loaded from, or None."""

        self.transforms_computed = False
        """Records whether transforms were (re)computed."""

        self.nominal_outputs_computed = False
        """Records whether nominal outputs were (re)computed."""

        self.outputs_loaded_from_cache = None
        """Records which cache outputs were loaded from, or None."""

        self.outputs_computed = False
        """Records whether outputs were (re)computed."""

        self.nominal_transforms_hash = None
        self.transforms_hash = None
        self.nominal_outputs_hash = None
        self.outputs_hash = None
        self.instantiate_disk_cache()
Beispiel #6
0
    def _compute_nominal_transforms(self):
        """Compute new PID transforms."""
        logging.debug('Updating pid.hist PID histograms...')

        # TODO(shivesh): As of now, events do not have units as far as PISA
        # is concerned

        self.load_events(self.params.pid_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # TODO: in future, the events file will not have these combined
        # already, and it should be done here (or in a nominal transform,
        # etc.). See below about taking this step when we move to directly
        # using the I3-HDF5 files.
        #events_file_combined_flavints = tuple([
        #    NuFlavIntGroup(s)
        #    for s in self.events.metadata['flavints_joined']
        #])

        # TODO: take events object as an input instead of as a param that
        # specifies a file? Or handle both cases?

        pid_spec = OrderedDict(eval(self.params.pid_spec.value))
        if set(pid_spec.keys()) != set(self.output_channels):
            msg = 'PID criteria from `pid_spec` {0} does not match {1}'
            raise ValueError(msg.format(pid_spec.keys(), self.output_channels))

        # TODO: add importance weights, error computation

        logging.debug("Separating events by PID...")
        separated_events = OrderedDict()
        for sig in self.output_channels:
            this_sig_events = self.events.applyCut(pid_spec[sig])
            separated_events[sig] = this_sig_events

        # Derive transforms by combining flavints that behave similarly, but
        # apply the derived transforms to the input flavints separately
        # (leaving combining these together to later)
        transforms = []
        for flavint_group in self.transform_groups:
            logging.debug("Working on %s PID", flavint_group)

            repr_flavint = flavint_group[0]

            # TODO(shivesh): errors
            # TODO(shivesh): total histo check?
            sig_histograms = {}
            total_histo = np.zeros(self.output_binning.shape)
            for repr_flavint in flavint_group:
                histo = self.events.histogram(
                    kinds=repr_flavint,
                    binning=self.output_binning,
                    weights_col=self.params.pid_weights_name.value,
                    errors=None).hist
                total_histo += histo

            for sig in self.output_channels:
                sig_histograms[sig] = np.zeros(self.output_binning.shape)
                for repr_flavint in flavint_group:
                    this_sig_histo = separated_events[sig].histogram(
                        kinds=repr_flavint,
                        binning=self.output_binning,
                        weights_col=self.params.pid_weights_name.value,
                        errors=None).hist
                    sig_histograms[sig] += this_sig_histo

            for sig in self.output_channels:
                with np.errstate(divide='ignore', invalid='ignore'):
                    xform_array = sig_histograms[sig] / total_histo

                num_invalid = np.sum(~np.isfinite(xform_array))
                if num_invalid > 0:
                    logging.warn(
                        'Group "%s", PID signature "%s" has %d bins with no'
                        ' events (and hence the ability to separate events'
                        ' by PID cannot be ascertained). These are being'
                        ' masked off from any further computations.',
                        flavint_group, sig, num_invalid)
                    # TODO: this caused buggy event propagation for some
                    # reason; check and re-introduced the masked array idea
                    # when this is fixed. For now, replicating the behavior
                    # from PISA 2.
                    #xform_array = np.ma.masked_invalid(xform_array)

                # Double check that no NaN remain
                #assert not np.any(np.isnan(xform_array))

                # Copy this transform to use for each input in the group
                for input_name in self.input_names:
                    if input_name not in flavint_group:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=input_name,
                        output_name=self.suffix_channel(input_name, sig),
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=xform_array)
                    transforms.append(xform)

        return TransformSet(transforms=transforms)
Beispiel #7
0
    def _compute_nominal_transforms(self):
        self.load_events(self.params.aeff_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # Units must be the following for correctly converting a sum-of-
        # OneWeights-in-bin to an average effective area across the bin.
        comp_units = dict(true_energy='GeV',
                          true_coszen=None,
                          true_azimuth='rad')

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.input_binning
        }
        #out_units = {dim: unit for dim, unit in comp_units.items()
        #             if dim in self.output_binning}

        # These will be in the computational units
        input_binning = self.input_binning.to(**in_units)

        # Account for "missing" dimension(s) (dimensions OneWeight expects for
        # computation of bin volume), and accommodate with a factor equal to
        # the full range. See IceCube wiki/documentation for OneWeight for
        # more info.
        missing_dims_vol = 1
        # TODO: currently, azimuth required to *not* be part of input binning
        if 'true_azimuth' not in input_binning:
            missing_dims_vol *= 2 * np.pi
        # TODO: Following is currently never the case, handle?
        if 'true_coszen' not in input_binning:
            missing_dims_vol *= 2

        nominal_transforms = []

        for xform_flavints in self.transform_groups:
            logging.info("Working on %s effective areas xform", xform_flavints)

            raw_hist = self.events.histogram(kinds=xform_flavints,
                                             binning=input_binning,
                                             weights_col='weighted_aeff',
                                             errors=True)
            raw_transform = unp.nominal_values(raw_hist.hist)
            raw_errors = unp.std_devs(raw_hist.hist)

            # Divide histogram by
            #   (energy bin width x coszen bin width x azimuth bin width)
            # volumes to convert from sums-of-OneWeights-in-bins to
            # effective areas. Note that volume correction factor for
            # missing dimensions is applied here.
            bin_volumes = input_binning.bin_volumes(attach_units=False)
            raw_transform /= (bin_volumes * missing_dims_vol)
            raw_errors /= (bin_volumes * missing_dims_vol)

            e_idx = input_binning.index('true_energy')
            if e_idx == 1:
                # transpose
                raw_transform = raw_transform.T
                raw_errors = raw_errors.T

            # Do the smoothing
            smooth_transform = self.smooth(raw_transform, raw_errors,
                                           input_binning['true_energy'],
                                           input_binning['true_coszen'])

            if e_idx == 1:
                # transpose back
                smooth_transform = smooth_transform.T

            nominal_transforms.extend(
                populate_transforms(service=self,
                                    xform_flavints=xform_flavints,
                                    xform_array=smooth_transform))

        return TransformSet(transforms=nominal_transforms)
Beispiel #8
0
    def _compute_nominal_transforms(self):
        self.load_events(self.params.aeff_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # Units must be the following for correctly converting a sum-of-
        # OneWeights-in-bin to an average effective area across the bin.
        comp_units = dict(true_energy='GeV', true_coszen=None,
                          true_azimuth='rad')

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {dim: unit for dim, unit in comp_units.items()
                    if dim in self.input_binning}

        # TODO: use out_units for some kind of conversion?
        #out_units = {dim: unit for dim, unit in comp_units.items()
        #             if dim in self.output_binning}

        # These will be in the computational units
        input_binning = self.input_binning.to(**in_units)

        # Account for "missing" dimension(s) (dimensions OneWeight expects for
        # computation of bin volume), and accommodate with a factor equal to
        # the full range. See IceCube wiki/documentation for OneWeight for
        # more info.
        missing_dims_vol = 1
        if 'true_azimuth' not in input_binning:
            missing_dims_vol *= 2*np.pi
        if 'true_coszen' not in input_binning:
            missing_dims_vol *= 2

        if bool(self.debug_mode):
            outdir = os.path.join(find_resource('debug'),
                                  self.stage_name,
                                  self.service_name)
            mkdir(outdir)
            #hex_hash = hash2hex(kde_hash)

        bin_volumes = input_binning.bin_volumes(attach_units=False)
        norm_volumes = bin_volumes * missing_dims_vol

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s effective areas xform',
                          xform_flavints)

            aeff_transform = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning,
                weights_col='weighted_aeff',
                errors=(self.error_method not in [None, False])
            )
            aeff_transform = aeff_transform.hist

            # Divide histogram by
            #   (energy bin width x coszen bin width x azimuth bin width)
            # volumes to convert from sums-of-OneWeights-in-bins to
            # effective areas. Note that volume correction factor for
            # missing dimensions is applied here.
            aeff_transform /= norm_volumes

            if self.debug_mode:
                outfile = os.path.join(
                    outdir, 'aeff_' + str(xform_flavints) + '.pkl'
                )
                to_file(aeff_transform, outfile)

            nominal_transforms.extend(
                populate_transforms(
                    service=self,
                    xform_flavints=xform_flavints,
                    xform_array=aeff_transform
                )
            )

        return TransformSet(transforms=nominal_transforms)
Beispiel #9
0
def compute_transforms(service):
    """Compute effective area transforms, taking aeff systematics into account.

    Systematics are: `aeff_scale`, `livetime`, and `nutau_cc_norm`

    """
    aeff_scale = service.params.aeff_scale.m_as('dimensionless')
    livetime_s = service.params.livetime.m_as('sec')
    base_scale = aeff_scale * livetime_s

    logging.trace('livetime = %s --> %s sec',
                  service.params.livetime.value, livetime_s)

    if service.particles == 'neutrinos':
        if not hasattr(service, 'nutau_cc_norm_must_be_one'):
            service.nutau_cc_norm_must_be_one = False
            """If any flav/ints besides nutau_cc and nutaubar_cc are grouped
            with one or both of those for transforms, then a
            `nutau_cc_norm` != 1 cannot be applied."""

            nutaucc_and_nutaubarcc = set(NuFlavIntGroup('nutau_cc+nutaubar_cc'))
            for group in service.transform_groups:
                # If nutau_cc, nutaubar_cc, or both are the group and other flavors
                # are present, nutau_cc_norm must be one!
                group_set = set(group)
                if group_set.intersection(nutaucc_and_nutaubarcc) and \
                        group_set.difference(nutaucc_and_nutaubarcc):
                    service.nutau_cc_norm_must_be_one = True

        nutau_cc_norm = service.params.nutau_cc_norm.m_as('dimensionless')
        if nutau_cc_norm != 1 and service.nutau_cc_norm_must_be_one:
            raise ValueError(
                '`nutau_cc_norm` = %e but can only be != 1 if nutau CC and'
                ' nutaubar CC are separated from other flav/ints.'
                ' Transform groups are: %s'
                % (nutau_cc_norm, service.transform_groups)
            )

    if hasattr(service, 'sum_grouped_flavints'):
        sum_grouped_flavints = service.sum_grouped_flavints
    else:
        sum_grouped_flavints = False

    new_transforms = []
    for transform in service.nominal_transforms:
        this_scale = base_scale
        if service.particles == 'neutrinos':
            out_nfig = NuFlavIntGroup(transform.output_name)
            if 'nutau_cc' in out_nfig or 'nutaubar_cc' in out_nfig:
                this_scale *= nutau_cc_norm

        if this_scale != 1:
            aeff_transform = transform.xform_array * this_scale
        else:
            aeff_transform = transform.xform_array

        new_xform = BinnedTensorTransform(
            input_names=transform.input_names,
            output_name=transform.output_name,
            input_binning=transform.input_binning,
            output_binning=transform.output_binning,
            xform_array=aeff_transform,
            sum_inputs=sum_grouped_flavints
        )
        new_transforms.append(new_xform)

    return TransformSet(new_transforms)
Beispiel #10
0
    def _compute_nominal_transforms(self):
        """Compute cross-section transforms."""
        logging.info('Updating xsec.genie cross-section histograms...')

        self.load_xsec_splines()
        livetime = self._ev_param(self.params['livetime'].value)
        ice_p = self._ev_param(self.params['ice_p'].value)
        fid_vol = self._ev_param(self.params['fid_vol'].value)
        mr_h20 = self._ev_param(self.params['mr_h20'].value)
        x_energy_scale = self.params['x_energy_scale'].value

        input_binning = self.input_binning

        ebins = input_binning.true_energy
        for idx, name in enumerate(input_binning.names):
            if 'true_energy' in name:
                e_idx = idx

        xsec_transforms = {}
        for flav in self.input_names:
            for int_ in ALL_NUINT_TYPES:
                flavint = flav + '_' + str(int_)
                logging.debug('Obtaining cross-sections for %s', flavint)
                xsec_map = self.xsec.get_map(flavint,
                                             MultiDimBinning([ebins]),
                                             x_energy_scale=x_energy_scale)

                def func(idx):
                    if idx == e_idx:
                        return xsec_map.hist
                    return tuple(range(input_binning.shape[idx]))

                num_dims = input_binning.num_dims
                xsec_trns = np.meshgrid(*map(func, range(num_dims)),
                                        indexing='ij')[e_idx]
                xsec_trns *= (livetime * fid_vol * (ice_p / mr_h20) *
                              (6.022140857e+23 / ureg.mol))
                xsec_transforms[NuFlavInt(flavint)] = xsec_trns

        nominal_transforms = []
        for flavint_group in self.transform_groups:
            flav_names = [str(flav) for flav in flavint_group.flavs]
            for input_name in self.input_names:
                if input_name not in flav_names:
                    continue

                xform_array = []
                for flavint in flavint_group.flavints:
                    if flavint in xsec_transforms:
                        xform_array.append(xsec_transforms[flavint])
                xform_array = reduce(add, xform_array)

                xform = BinnedTensorTransform(
                    input_names=input_name,
                    output_name=str(flavint_group),
                    input_binning=input_binning,
                    output_binning=self.output_binning,
                    xform_array=xform_array)
                nominal_transforms.append(xform)

        return TransformSet(transforms=nominal_transforms)
Beispiel #11
0
    def _compute_transforms(self):
        """Generate reconstruction "smearing kernels" by histogramming true and
        reconstructed variables from a Monte Carlo events file.

        The resulting transform is a 2N-dimensional histogram, where N is the
        dimensionality of the input binning. The transform maps the truth bin
        counts to the reconstructed bin counts.

        I.e., for the case of 1D input binning, the ith element of the
        reconstruction kernel will be a map showing the distribution of events
        over all the reco space from truth bin i. This will be normalised to
        the total number of events in truth bin i.

        Notes
        -----
        In the current implementation these histograms are made
        **UN**weighted. This is probably quite wrong...

        """
        e_res_scale = self.params.e_res_scale.value.m_as('dimensionless')
        cz_res_scale = self.params.cz_res_scale.value.m_as('dimensionless')
        e_reco_bias = self.params.e_reco_bias.value.m_as('GeV')
        cz_reco_bias = self.params.cz_reco_bias.value.m_as('dimensionless')
        res_scale_ref = self.params.res_scale_ref.value.strip().lower()
        assert res_scale_ref in ['zero']  # TODO: , 'mean', 'median']

        self.load_events(self.params.reco_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # Computational units must be the following for compatibility with
        # events file
        comp_units = dict(true_energy='GeV',
                          true_coszen=None,
                          true_azimuth='rad',
                          reco_energy='GeV',
                          reco_coszen=None,
                          reco_azimuth='rad',
                          pid=None)

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.input_binning
        }
        out_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.output_binning
        }

        # These binnings will be in the computational units defined above
        input_binning = self.input_binning.to(**in_units)
        output_binning = self.output_binning.to(**out_units)

        xforms = []
        for xform_flavints in self.transform_groups:
            logging.debug("Working on %s reco kernels" % xform_flavints)

            repr_flavint = xform_flavints[0]

            true_energy = self.events[repr_flavint]['true_energy']
            true_coszen = self.events[repr_flavint]['true_coszen']
            reco_energy = self.events[repr_flavint]['reco_energy']
            reco_coszen = self.events[repr_flavint]['reco_coszen']
            e_reco_err = reco_energy - true_energy
            cz_reco_err = reco_coszen - true_coszen

            if self.params.res_scale_ref.value.strip().lower() == 'zero':
                self.events[repr_flavint]['reco_energy'] = (
                    true_energy + e_reco_err * e_res_scale + e_reco_bias)
                self.events[repr_flavint]['reco_coszen'] = (
                    true_coszen + cz_reco_err * cz_res_scale + cz_reco_bias)

            # True (input) + reco {+ PID} (output)-dimensional histogram
            # is the basis for the transformation
            reco_kernel = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning * output_binning,
                weights_col=self.params.reco_weights_name.value,
                errors=(self.error_method not in [None, False]))
            # Extract just the numpy array to work with
            reco_kernel = reco_kernel.hist

            # This takes into account the correct kernel normalization:
            # What this means is that we have to normalise the reco map
            # to the number of events in the truth bin.
            #
            # I.e., we have N events from the truth bin which then become
            # spread out over the whole map due to reconstruction.
            # The normalisation is dividing this map by N.
            #
            # Previously this was hard-coded for 2 dimensions, but I have tried
            # to generalise it to arbitrary dimensionality.

            # Truth-only (N-dimensional) histogram will be used for
            # normalization (so transform is in terms of fraction-of-events in
            # input--i.e. truth--bin). Sum over the input dimensions.
            true_event_counts = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning,
                weights_col=self.params.reco_weights_name.value,
                errors=(self.error_method not in [None, False]))
            # Extract just the numpy array to work with
            true_event_counts = true_event_counts.hist

            # If there weren't any events in the input (true_*) bin, make this
            # bin have no effect -- i.e., populate all output bins
            # corresponding to the input bin with zeros via `nan_to_num`.
            with np.errstate(divide='ignore', invalid='ignore'):
                true_event_counts[true_event_counts == 0] = np.nan
                norm_factors = 1.0 / true_event_counts
                norm_factors = np.nan_to_num(norm_factors)

            # Numpy broadcasts lower-dimensional things to higher dimensions
            # from last dimension to first; if we simply mult the reco_kernel
            # by norm_factors, this will apply the normalization to the
            # __output__ dimensions rather than the input dimensions. Add
            # "dummy" dimensions to norm_factors where we want the "extra
            # dimensions": at the end.
            for dim in self.output_binning:
                norm_factors = np.expand_dims(norm_factors, axis=-1)

            # Apply the normalization to the kernels
            reco_kernel *= norm_factors

            assert np.all(reco_kernel >= 0), \
                    'number of elements less than 0 = %d' \
                    % np.sum(reco_kernel < 0)
            sum_over_axes = tuple(range(-len(self.output_binning), 0))
            totals = np.sum(reco_kernel, axis=sum_over_axes)
            assert np.all(
                totals <= 1 + 1e-14), 'max = ' + str(np.max(totals) - 1)

            # Now populate this transform to each input for which it applies.

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    input_flavs = NuFlavIntGroup(input_name)
                    if len(set(xform_flavints).intersection(input_flavs)) > 0:
                        xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=output_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                        sum_inputs=self.sum_grouped_flavints)
                    xforms.append(xform)
            else:
                # NOTES:
                # * Output name is same as input name
                # * Use `self.input_binning` and `self.output_binning` so maps
                #   are returned in user-defined units (rather than
                #   computational units, which are attached to the non-`self`
                #   versions of these binnings).
                for input_name in self.input_names:
                    if input_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=input_name,
                        output_name=input_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                    )
                    xforms.append(xform)

        return TransformSet(transforms=xforms)
Beispiel #12
0
    def _compute_transforms(self):
        """
        Generate reconstruction "smearing kernels" by reading in a set of
        parameterisation functions from a json file. This should have the same
        dimensionality as the input binning i.e. if you have energy and
        coszenith input binning then the kernels provided should have both
        energy and coszenith resolution functions.

        Any superposition of distributions from scipy.stats is supported.
        """
        res_scale_ref = self.params.res_scale_ref.value.strip().lower()
        assert res_scale_ref in ['zero']  # TODO: , 'mean', 'median']

        reco_param_source = self.params.reco_paramfile.value

        if reco_param_source is None:
            raise ValueError(
                'non-None reco parameterization params.reco_paramfile'
                ' must be provided')

        reco_param_hash = hash_obj(reco_param_source)

        if (self._reco_param_hash is None
                or reco_param_hash != self._reco_param_hash):
            reco_param = load_reco_param(reco_param_source)

            # Transform groups are implicitly defined by the contents of the
            # reco paramfile's keys
            implicit_transform_groups = reco_param.keys()

            # Make sure these match transform groups specified for the stage
            if set(implicit_transform_groups) != set(self.transform_groups):
                raise ValueError(
                    'Transform groups (%s) defined implicitly by'
                    ' %s reco parameterizations do not match those'
                    ' defined as the stage\'s `transform_groups` (%s).' %
                    (implicit_transform_groups, reco_param_source,
                     self.transform_groups))

            self.param_dict = reco_param
            self._reco_param_hash = reco_param_hash

            self.eval_dict = self.evaluate_reco_param()
            self.reco_scales_and_biases_applicable()

        # everything seems to be fine, so rescale and shift distributions
        eval_dict = self.scale_and_shift_reco_dists()

        # Computational units must be the following for compatibility with
        # events file
        comp_units = dict(true_energy='GeV',
                          true_coszen=None,
                          true_azimuth='rad',
                          reco_energy='GeV',
                          reco_coszen=None,
                          reco_azimuth='rad',
                          pid=None)

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.input_binning
        }
        out_units = {
            dim: unit
            for dim, unit in comp_units.items() if dim in self.output_binning
        }

        # These binnings will be in the computational units defined above
        input_binning = self.input_binning.to(**in_units)
        output_binning = self.output_binning.to(**out_units)
        en_centers_in = self.input_binning[
            'true_energy'].weighted_centers.magnitude
        en_edges_in = self.input_binning['true_energy'].bin_edges.magnitude
        cz_centers_in = self.input_binning[
            'true_coszen'].weighted_centers.magnitude
        cz_edges_in = self.input_binning['true_coszen'].bin_edges.magnitude
        en_edges_out = self.output_binning['reco_energy'].bin_edges.magnitude
        cz_edges_out = self.output_binning['reco_coszen'].bin_edges.magnitude

        n_e_in = len(en_centers_in)
        n_cz_in = len(cz_centers_in)
        n_e_out = len(en_edges_out) - 1
        n_cz_out = len(cz_edges_out) - 1

        if self.coszen_flipback:
            cz_edges_out, flipback_mask, keep = \
                self.extend_binning_for_coszen(ext_low=-3., ext_high=+3.)

        xforms = []
        for xform_flavints in self.transform_groups:
            logging.debug("Working on %s reco kernel..." % xform_flavints)

            this_params = eval_dict[xform_flavints]
            reco_kernel = np.zeros((n_e_in, n_cz_in, n_e_out, n_cz_out))

            for (i, j) in itertools.product(range(n_e_in), range(n_cz_in)):
                e_kern_cdf = self.make_cdf(bin_edges=en_edges_out,
                                           enval=en_centers_in[i],
                                           enindex=i,
                                           czval=None,
                                           czindex=j,
                                           dist_params=this_params['energy'])
                cz_kern_cdf = self.make_cdf(bin_edges=cz_edges_out,
                                            enval=en_centers_in[i],
                                            enindex=i,
                                            czval=cz_centers_in[j],
                                            czindex=j,
                                            dist_params=this_params['coszen'])

                if self.coszen_flipback:
                    cz_kern_cdf = perform_coszen_flipback(
                        cz_kern_cdf, flipback_mask, keep)

                reco_kernel[i, j] = np.outer(e_kern_cdf, cz_kern_cdf)

            # Sanity check of reco kernels - intolerable negative values?
            logging.trace(" Ensuring reco kernel sanity...")
            kern_neg_invalid = reco_kernel < -EQUALITY_PREC
            if np.any(kern_neg_invalid):
                raise ValueError("Detected intolerable negative entries in"
                                 " reco kernel! Min.: %.15e" %
                                 np.min(reco_kernel))

            # Set values numerically compatible with zero to zero
            np.where((np.abs(reco_kernel) < EQUALITY_PREC), reco_kernel, 0)
            sum_over_axes = tuple(range(-len(self.output_binning), 0))
            totals = np.sum(reco_kernel, axis=sum_over_axes)
            totals_large = totals > (1 + EQUALITY_PREC)
            if np.any(totals_large):
                raise ValueError("Detected overflow in reco kernel! Max.:"
                                 " %0.15e" % (np.max(totals)))

            if self.input_binning.basenames[0] == "coszen":
                # The reconstruction kernel has been set up with energy as its
                # first dimension, so swap axes if it is applied to an input
                # binning where 'coszen' is the first
                logging.trace(" Swapping kernel dimensions since 'coszen' has"
                              " been requested as the first.")
                reco_kernel = np.swapaxes(reco_kernel, 0, 1)
                reco_kernel = np.swapaxes(reco_kernel, 2, 3)

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    if set(NuFlavIntGroup(input_name)).isdisjoint(
                            xform_flavints):
                        continue
                    xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=output_name,
                        input_binning=self.input_binning,
                        output_binning=self.output_binning,
                        xform_array=reco_kernel,
                        sum_inputs=self.sum_grouped_flavints)
                    xforms.append(xform)
            # If *not* combining grouped flavints:
            # Copy the transform for each input flavor, regardless if the
            # transform is computed from a combination of flavors.
            else:
                for input_name in self.input_names:
                    if set(NuFlavIntGroup(input_name)).isdisjoint(
                            xform_flavints):
                        continue
                    for output_name in self.output_names:
                        if (output_name not in NuFlavIntGroup(input_name)
                                or output_name not in xform_flavints):
                            continue
                        logging.trace('  input: %s, output: %s, xform: %s',
                                      input_name, output_name, xform_flavints)

                        xform = BinnedTensorTransform(
                            input_names=input_name,
                            output_name=output_name,
                            input_binning=self.input_binning,
                            output_binning=self.output_binning,
                            xform_array=reco_kernel,
                            sum_inputs=self.sum_grouped_flavints)
                        xforms.append(xform)

        return TransformSet(transforms=xforms)
Beispiel #13
0
    def _compute_nominal_transforms(self):
        """Compute new PID transforms."""
        logging.debug('Updating pid.param PID histograms...')

        self.load_pid_energy_param(self.params.pid_energy_paramfile.value)

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s PID', xform_flavints)

            xform_array = np.empty(self.transform_output_binning.shape)

            subdict = self.pid_energy_param_dict[xform_flavints]
            for signature, sig_param_func in subdict.items():
                # Get the PID probabilities vs. energy at the energy bins'
                # (weighted) centers
                pid1d = sig_param_func(self.ebin_centers)

                # Broadcast this 1d array across the reco_coszen dimension
                # since it's independent of reco_coszen
                broadcasted_pid = self.transform_output_binning.broadcast(
                    pid1d, from_dim='reco_energy', to_dims='reco_coszen')

                pid_indexer = (self.transform_output_binning.indexer(
                    pid=signature))

                # Assign the broadcasted array to the correct PID bin
                xform_array[pid_indexer] = broadcasted_pid

            if self.sum_grouped_flavints:
                xform_input_names = []
                for input_name in self.input_names:
                    input_flavs = NuFlavIntGroup(input_name)
                    if set(xform_flavints).intersection(input_flavs):
                        xform_input_names.append(input_name)

                for output_name in self.output_names:
                    if output_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=xform_input_names,
                        output_name=str(xform_flavints),
                        input_binning=self.input_binning,
                        output_binning=self.transform_output_binning,
                        xform_array=xform_array,
                        sum_inputs=self.sum_grouped_flavints)
                    nominal_transforms.append(xform)

            else:
                for input_name in self.input_names:
                    if input_name not in xform_flavints:
                        continue
                    xform = BinnedTensorTransform(
                        input_names=input_name,
                        output_name=input_name,
                        input_binning=self.input_binning,
                        output_binning=self.transform_output_binning,
                        xform_array=xform_array,
                    )
                    nominal_transforms.append(xform)

        return TransformSet(transforms=nominal_transforms)
Beispiel #14
0
    def _compute_nominal_transforms(self):
        """Compute parameterised effective area transforms"""
        energy_param_source = self.params.aeff_energy_paramfile.value
        coszen_param_source = self.params.aeff_coszen_paramfile.value

        energy_param_hash = hash_obj(energy_param_source)
        coszen_param_hash = hash_obj(coszen_param_source)

        load_energy = False
        load_coszen = False
        if (self._param_hashes['energy'] is None
                or energy_param_hash != self._param_hashes['energy']):
            load_energy = True

        if (self.has_cz
                and (self._param_hashes['coszen'] is None
                     or energy_param_hash != self._param_hashes)):
            load_coszen = True

        if energy_param_source is None:
            raise ValueError(
                'non-None energy parameterization params.aeff_energy_paramfile'
                ' must be provided'
            )
        if not self.has_cz and coszen_param_source is not None:
            raise ValueError(
                'true_coszen dimension was not found in the binning but a'
                ' coszen parameterisation file has been provided by'
                ' `params.aeff_coszen_paramfile`.'
            )

        if not (load_energy or load_coszen):
            return

        dims = ['energy', 'coszen']
        loads = [load_energy, load_coszen]
        sources = [energy_param_source, coszen_param_source]
        hashes = [energy_param_hash, coszen_param_hash]

        for dim, load, source, hash_ in zip(dims, loads, sources, hashes):
            if not load:
                continue
            self._param_hashes[dim] = None
            self.aeff_params[dim] = None
            params = load_aeff_param(source)

            # Transform groups are implicitly defined by the contents of the
            # `pid_energy_paramfile`'s keys
            implicit_transform_groups = params.keys()

            # Make sure these match transform groups specified for the stage
            if set(implicit_transform_groups) != set(self.transform_groups):
                raise ValueError(
                    'Transform groups (%s) defined implicitly by'
                    ' %s aeff parameterizations "%s"  do not match those'
                    ' defined as the stage\'s `transform_groups` (%s).'
                    % (implicit_transform_groups, dim, source,
                       self.transform_groups)
                )

            self.aeff_params[dim] = params
            self._param_hashes[dim] = hash_

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s effective areas xform',
                          xform_flavints)

            energy_param_func = self.aeff_params['energy'][xform_flavints]
            coszen_param_func = None
            if self.aeff_params['coszen'] is not None:
                coszen_param_func = self.aeff_params['coszen'][xform_flavints]

            # Now calculate the 1D aeff along energy
            aeff_vs_e = energy_param_func(self.ecen)

            # NOTE/TODO: Below is taken from the PISA 2 implementation of this.
            # Almost certainly comes from the fact that the highest knot there
            # was 79.5 GeV with the upper energy bin edge being 80 GeV. There's
            # probably something better that could be done here...

            # Correct for final energy bin, since interpolation does not
            # extend to JUST right outside the final bin
            if aeff_vs_e[-1] == 0:
                aeff_vs_e[-1] = aeff_vs_e[-2]

            if self.has_cz:
                aeff_vs_e = self.input_binning.broadcast(
                    aeff_vs_e, from_dim='true_energy', to_dims='true_coszen'
                )

                if coszen_param_func is not None:
                    aeff_vs_cz = coszen_param_func(self.czcen)
                    # Normalize
                    aeff_vs_cz *= len(aeff_vs_cz) / np.sum(aeff_vs_cz)
                else:
                    aeff_vs_cz = np.ones(shape=len(self.czcen))

                cz_broadcasted = self.input_binning.broadcast(
                    aeff_vs_cz, from_dim='true_coszen', to_dims='true_energy'
                )
                aeff_transform = aeff_vs_e * cz_broadcasted
            else:
                aeff_transform = aeff_vs_e

            nominal_transforms.extend(
                populate_transforms(
                    service=self,
                    xform_flavints=xform_flavints,
                    xform_array=aeff_transform
                )
            )

        return TransformSet(transforms=nominal_transforms)