Esempio n. 1
0
    def apply_function(self):

        #if self.calc_mode == 'binned':
        #    raise NotImplementedError('Needs some care, broken in pisa4')
        #    self.data.representation = self.apply_mode
        #    for container in self.data:
        #        # calcualte errors
        #        if self.error_method in ['sumw2']:
        #            vectorizer.pow(
        #                vals=container['weights'],
        #                pwr=2,
        #                out=container['weights_squared'],
        #            )
        #            vectorizer.sqrt(
        #                vals=container['weights_squared'], out=container['errors']
        #            )

        if isinstance(self.calc_mode, MultiDimBinning):

            for container in self.data:

                container.representation = self.calc_mode
                weights = container['weights']
                transform = container['hist_transform']

                hist = weights @ transform
                if self.error_method == 'sumw2':
                    sumw2 = np.square(weights) @ transform

                container.representation = self.apply_mode
                container['weights'] = hist

                if self.error_method == 'sumw2':
                    container['errors'] = np.sqrt(sumw2)

        elif self.calc_mode == 'events':
            for container in self.data:
                # calcualte errors

                container.representation = self.calc_mode
                sample = [container[name] for name in self.apply_mode.names]
                weights = container['weights']

                hist = histogram(sample,
                                 weights,
                                 self.apply_mode,
                                 averaged=False)

                if self.error_method == 'sumw2':
                    sumw2 = histogram(sample,
                                      np.square(weights),
                                      self.apply_mode,
                                      averaged=False)

                container.representation = self.apply_mode

                container['weights'] = hist

                if self.error_method == 'sumw2':
                    container['errors'] = np.sqrt(sumw2)
Esempio n. 2
0
    def apply_function(self):

        if isinstance(self.calc_mode, MultiDimBinning):

            for container in self.data:

                container.representation = self.calc_mode
                weights = container['weights']
                transform = container['hist_transform']

                hist = weights @ transform
                if self.error_method == 'sumw2':
                    sumw2 = np.square(weights) @ transform

                container.representation = self.apply_mode
                container['weights'] = hist

                if self.error_method == 'sumw2':
                    container['errors'] = np.sqrt(sumw2)

        elif self.calc_mode == 'events':
            for container in self.data:
                container.representation = self.calc_mode
                sample = []
                dims_log = [d.is_log for d in self.apply_mode]
                dims_ire = [d.is_irregular for d in self.apply_mode]
                for dim, is_log, is_ire in zip(self.regularized_apply_mode,
                                               dims_log, dims_ire):
                    if is_log and not is_ire:
                        container.representation = "log_events"
                        sample.append(container[dim.name])
                    else:
                        container.representation = "events"
                        sample.append(container[dim.name])
                weights = container['weights']

                # The hist is now computed using a binning that is completely linear
                # and regular
                hist = histogram(sample,
                                 weights,
                                 self.regularized_apply_mode,
                                 averaged=False)

                if self.error_method == 'sumw2':
                    sumw2 = histogram(sample,
                                      np.square(weights),
                                      self.regularized_apply_mode,
                                      averaged=False)

                container.representation = self.apply_mode

                container['weights'] = hist

                if self.error_method == 'sumw2':
                    container['errors'] = np.sqrt(sumw2)
Esempio n. 3
0
    def setup_function(self):

        assert isinstance(
            self.apply_mode, MultiDimBinning
        ), "Hist stage needs a binning as `apply_mode`, but is %s" % self.apply_mode

        if isinstance(self.calc_mode, MultiDimBinning):

            # The two binning must be exclusive
            assert len(set(self.calc_mode.names)
                       & set(self.apply_mode.names)) == 0

            transform_binning = self.calc_mode + self.apply_mode

            # go to "events" mode to create the transforms

            for container in self.data:
                self.data.representation = "events"
                sample = [container[name] for name in transform_binning.names]
                hist = histogram(sample,
                                 None,
                                 transform_binning,
                                 averaged=False)
                transform = hist.reshape(self.calc_mode.shape + (-1, ))
                self.data.representation = self.calc_mode
                container['hist_transform'] = transform
Esempio n. 4
0
    def array_to_binned(self, key, binning, averaged=True):
        '''
        histogram data array into binned data

        Parameters
        ----------

        key : str

        binning : MultiDimBinning

        averaged : bool
            if True, the histogram entries are averages of the numbers that
            end up in a given bin. This for example must be used when oscillation
            probabilities are translated.....otherwise we end up with probability*count
            per bin


        right now CPU only

        ToDo: make work for n-dim

        '''
        logging.debug('Transforming %s array to binned data' % (key))
        weights = self.array_data[key]
        sample = [self.array_data[n] for n in binning.names]

        hist = histogram(sample, weights, binning, averaged)

        self.add_binned_data(key, (binning, hist))
Esempio n. 5
0
    def array_to_binned(self, key, src_representation, dest_representation):
        """Histogram data array into binned data
        Parameters
        ----------
        key : str
        src_representation : str
        dest_representation : MultiDimBinning
        #averaged : bool
        #    if True, the histogram entries are averages of the numbers that
        #    end up in a given bin. This for example must be used when oscillation
        #    probabilities are translated.....otherwise we end up with probability*count
        #    per bin
        Notes
        -----
        right now, CPU-only
        """
        # TODO: make work for n-dim
        logging.trace('Transforming %s array to binned data' % (key))

        assert src_representation in self.array_representations
        assert isinstance(dest_representation, MultiDimBinning)

        if not dest_representation.is_irregular:
            sample = []
            dimensions = []
            for d in dest_representation:
                if d.is_log:
                    self.representation = "log_events"
                    sample.append(self[d.name])
                    dimensions.append(
                        OneDimBinning(d.name,
                                      domain=np.log(d.domain.m),
                                      num_bins=d.num_bins))
                else:
                    self.representation = "events"
                    sample.append(self[d.name])
                    dimensions.append(d)
            hist_binning = MultiDimBinning(dimensions)
        else:
            self.representation = src_representation
            sample = [self[name] for name in dest_representation.names]
            hist_binning = dest_representation

        self.representation = src_representation
        weights = self[key]

        hist = histogram(sample, weights, hist_binning, averaged=True)

        return hist
Esempio n. 6
0
    def setup_function(self):

        assert isinstance(
            self.apply_mode, MultiDimBinning
        ), "Hist stage needs a binning as `apply_mode`, but is %s" % self.apply_mode

        if isinstance(self.calc_mode, MultiDimBinning):

            # The two binning must be exclusive
            assert len(set(self.calc_mode.names)
                       & set(self.apply_mode.names)) == 0

            transform_binning = self.calc_mode + self.apply_mode

            # go to "events" mode to create the transforms

            for container in self.data:
                self.data.representation = "events"
                sample = [container[name] for name in transform_binning.names]
                hist = histogram(sample,
                                 None,
                                 transform_binning,
                                 averaged=False)
                transform = hist.reshape(self.calc_mode.shape + (-1, ))
                self.data.representation = self.calc_mode
                container['hist_transform'] = transform

        elif self.calc_mode == "events":
            # For dimensions where the binning is irregular, we pre-compute the
            # index that each sample falls into and then bin regularly in the index.
            # For dimensions that are logarithmic, we add a linear binning in
            # the logarithm.
            dimensions = []
            for dim in self.apply_mode:
                if dim.is_irregular:
                    # create a new axis with digitized variable
                    varname = dim.name + "__" + self.apply_mode.name + "_idx"
                    new_dim = OneDimBinning(varname,
                                            domain=[0, dim.num_bins],
                                            num_bins=dim.num_bins)
                    dimensions.append(new_dim)
                    for container in self.data:
                        container.representation = "events"
                        x = container[dim.name] * dim.units
                        # Compute the bin index each sample would fall into, and
                        # shift by -1 such that samples below the binning range
                        # get assigned the index -1.
                        x_idx = np.searchsorted(dim.bin_edges, x,
                                                side="right") - 1
                        # To be consistent with numpy histogramming, we need to
                        # shift those values that are exactly at the uppermost edge
                        # down one index such that they are included in the highest
                        # bin instead of being treated as an outlier.
                        on_edge = (x == dim.bin_edges[-1])
                        x_idx[on_edge] -= 1
                        container[varname] = x_idx
                elif dim.is_log:
                    # We don't compute the log of the variable just yet, this
                    # will be done later during `apply_function` using the
                    # representation mechanism.
                    new_dim = OneDimBinning(dim.name,
                                            domain=np.log(dim.domain.m),
                                            num_bins=dim.num_bins)
                    dimensions.append(new_dim)
                else:
                    dimensions.append(dim)
            self.regularized_apply_mode = MultiDimBinning(dimensions)
            logging.debug("Using regularized binning:\n" +
                          str(self.regularized_apply_mode))
        else:
            raise ValueError(f"unknown calc mode: {self.calc_mode}")
Esempio n. 7
0
File: hist.py Progetto: icecube/pisa
    def apply_function(self):

        if isinstance(self.calc_mode, MultiDimBinning):

            if self.unweighted:
                raise NotImplementedError(
                    "Unweighted hist only implemented in event-wise calculation"
                )
            for container in self.data:

                container.representation = self.calc_mode
                if "astro_weights" in container.keys:
                    weights = container["weights"] + container["astro_weights"]
                else:
                    weights = container["weights"]
                transform = container["hist_transform"]

                hist = weights @ transform
                if self.error_method == "sumw2":
                    sumw2 = np.square(weights) @ transform

                container.representation = self.apply_mode
                container["weights"] = hist

                if self.error_method == "sumw2":
                    container["errors"] = np.sqrt(sumw2)

        elif self.calc_mode == "events":
            for container in self.data:
                container.representation = self.calc_mode
                sample = []
                dims_log = [d.is_log for d in self.apply_mode]
                dims_ire = [d.is_irregular for d in self.apply_mode]
                for dim, is_log, is_ire in zip(self.regularized_apply_mode,
                                               dims_log, dims_ire):
                    if is_log and not is_ire:
                        container.representation = "log_events"
                        sample.append(container[dim.name])
                    else:
                        container.representation = "events"
                        sample.append(container[dim.name])

                if self.unweighted:
                    if "astro_weights" in container.keys:
                        weights = np.ones_like(container["weights"] +
                                               container["astro_weights"])
                    else:
                        weights = np.ones_like(container["weights"])
                else:
                    if "astro_weights" in container.keys:
                        weights = container["weights"] + container[
                            "astro_weights"]
                    else:
                        weights = container["weights"]

                # The hist is now computed using a binning that is completely linear
                # and regular
                hist = histogram(sample,
                                 weights,
                                 self.regularized_apply_mode,
                                 averaged=False)

                if self.error_method == "sumw2":
                    sumw2 = histogram(
                        sample,
                        np.square(weights),
                        self.regularized_apply_mode,
                        averaged=False,
                    )

                container.representation = self.apply_mode

                container["weights"] = hist

                if self.error_method == "sumw2":
                    container["errors"] = np.sqrt(sumw2)