Esempio n. 1
0
 def get_map(self, binning, **kwargs):
     """Return a map of the spline evaluated at the centers of the
     given binning.
     """
     if not isinstance(binning, MultiDimBinning):
         if isinstance(binning, Sequence):
             binning = MultiDimBinning(dimensions=binning)
         elif isinstance(binning, Mapping):
             binning = MultiDimBinning(**binning)
         else:
             raise ValueError('Do not know what to do with `binning`=%s of'
                              ' type %s' % (binning, type(binning)))
     if self._validate_spl is not None:
         self._validate_spl(binning)
     return self._eval_spl(self.spline, binning, name=self.name, **kwargs)
Esempio n. 2
0
    def keep_inbounds(self, binning):
        """Cut out any events that fall outside `binning`. Note that events
        that fall exactly on an outer edge are kept.

        Parameters
        ----------
        binning : OneDimBinning or MultiDimBinning

        Returns
        -------
        cut_data : EventsPi

        """
        # Get the binning instance
        try:
            binning = OneDimBinning(binning)
        except:  # pylint: disable=bare-except
            pass
        if isinstance(binning, OneDimBinning):
            binning = [binning]
        binning = MultiDimBinning(binning)

        # Define a cut to remove events outside of the binned region
        bin_edge_cuts = [dim.inbounds_criteria for dim in binning]
        bin_edge_cuts = " & ".join([str(x) for x in bin_edge_cuts])

        # Apply the cut
        return self.apply_cut(bin_edge_cuts)
Esempio n. 3
0
    def compute_binning_constants(self):
        """Compute some constants related to the binning.
        Just for illustrating a few properties of the
        binning one might want to evaluate."""
        # Get the energy/coszen (ONLY) weighted centers here, since these
        # are actually used in the oscillations computation. All other
        # dimensions are ignored. Since these won't change so long as the
        # binning doesn't change, attache these to self.
        self.ecz_binning = MultiDimBinning([
            self.input_binning.true_energy.to('GeV'),
            self.input_binning.true_coszen.to('dimensionless')
        ])
        e_centers, cz_centers = self.ecz_binning.weighted_centers
        self.e_centers = e_centers.magnitude
        self.cz_centers = cz_centers.magnitude

        self.num_czbins = self.input_binning.true_coszen.num_bins
        self.num_ebins = self.input_binning.true_energy.num_bins

        self.e_dim_num = self.input_binning.names.index('true_energy')
        self.cz_dim_num = self.input_binning.names.index('true_coszen')

        # Illustrate how to find input binning dimensions which the transforms
        # created by this service will not depend on.
        self.extra_dim_nums = list(range(self.input_binning.num_dims))
        [
            self.extra_dim_nums.remove(d)
            for d in (self.e_dim_num, self.cz_dim_num)
        ]
Esempio n. 4
0
    def _flatten_to_2d(in_map):
        assert isinstance(in_map, Map)
        shape = in_map.shape
        names = in_map.binning.names
        dims = len(shape)
        assert dims % 2 == 0

        nbins_a = np.product(shape[:dims // 2])
        nbins_b = np.product(shape[dims // 2:])
        names_a = reduce(lambda x, y: x + ' ' + y, names[:dims // 2])
        names_b = reduce(lambda x, y: x + ' ' + y, names[dims // 2:])

        binning = []
        binning.append(
            OneDimBinning(name=names_a,
                          num_bins=nbins_a,
                          is_lin=True,
                          domain=[0, nbins_a]))
        binning.append(
            OneDimBinning(name=names_b,
                          num_bins=nbins_b,
                          is_lin=True,
                          domain=[0, nbins_b]))
        binning = MultiDimBinning(binning)

        hist = in_map.hist.reshape(nbins_a, nbins_b)
        return Map(name=in_map.name, hist=hist, binning=binning)
Esempio n. 5
0
    def binned_to_array(self, key, src_representation, dest_representation):
        """Augmented binned data to array data"""

        logging.trace('Transforming %s binned to array data' % (key))

        self.representation = src_representation
        weights = self[key]

        if not src_representation.is_irregular:
            logging.trace(
                f"Container `{self.name}`: regularized lookup for {key}")
            sample = []
            dimensions = []
            for d in src_representation:
                if d.is_log:
                    self.representation = "log_events"
                    sample.append(self[d.name])
                    dimensions.append(
                        OneDimBinning(d.name,
                                      domain=np.log(d.domain.m),
                                      num_bins=d.num_bins))
                else:
                    self.representation = "events"
                    sample.append(self[d.name])
                    dimensions.append(d)
            hist_binning = MultiDimBinning(dimensions)
        else:
            logging.trace(
                f"Container `{self.name}`: irregular lookup for {key}")
            self.representation = dest_representation
            sample = [self[name] for name in src_representation.names]
            hist_binning = src_representation

        return lookup(sample, weights, hist_binning)
Esempio n. 6
0
def test_container():
    n_evts = 10000
    x = np.arange(n_evts, dtype=FTYPE)
    y = np.arange(n_evts, dtype=FTYPE)
    w = np.ones(n_evts, dtype=FTYPE)
    w *= np.random.rand(n_evts)

    container = Container('test')
    container.add_array_data('x', x)
    container.add_array_data('y', y)
    container.add_array_data('w', w)


    binning_x = OneDimBinning(name='x', num_bins=10, is_lin=True, domain=[0, 100])
    binning_y = OneDimBinning(name='y', num_bins=10, is_lin=True, domain=[0, 100])
    binning = MultiDimBinning([binning_x, binning_y])
    #print binning.names
    print(container.get_binned_data('x', binning).get('host'))
    print(Container.unroll_binning('x', binning).get('host'))

    # array
    print('original array')
    print(container.get_array_data('w').get('host'))
    container.array_to_binned('w', binning)
    # binned
    print('binned')
    print(container.get_binned_data('w').get('host'))
    print(container.get_hist('w'))

    print('augmented again')
    # augment
    container.binned_to_array('w')
    print(container.get_array_data('w').get('host'))
Esempio n. 7
0
File: kde.py Progetto: icecube/pisa
    def setup_function(self):

        assert isinstance(
            self.apply_mode, MultiDimBinning
        ), f"KDE stage needs a binning as `apply_mode`, but is {self.apply_mode}"

        # For dimensions that are logarithmic, we add a linear binning in
        # the logarithm.
        dimensions = []
        for dim in self.apply_mode:
            if dim.is_lin:
                new_dim = deepcopy(dim)
            # We don't compute the log of the variable just yet, this
            # will be done later during `apply_function` using the
            # representation mechanism.
            # We replace the logarithmic binning with a linear binning in log-space
            elif dim.is_irregular:
                new_dim = OneDimBinning(
                    dim.name,
                    bin_edges=np.log(dim.bin_edges.m),
                )
            else:
                new_dim = OneDimBinning(dim.name,
                                        domain=np.log(dim.domain.m),
                                        num_bins=dim.num_bins)
            dimensions.append(new_dim)

            self.regularized_apply_mode = MultiDimBinning(dimensions)
            logging.debug("Using regularized binning:\n" +
                          repr(self.regularized_apply_mode))
Esempio n. 8
0
def test_histogram():
    """Unit tests for `histogram` function.

    Correctness is defined as matching the histogram produced by
    numpy.histogramdd.
    """
    all_num_bins = [2, 3, 4]
    n_evts = 10000
    rand = np.random.RandomState(seed=0)

    weights = rand.rand(n_evts).astype(FTYPE)
    binning = []
    sample = []
    for num_dims, num_bins in enumerate(all_num_bins, start=1):
        binning.append(
            OneDimBinning(
                name=f'dim{num_dims - 1}',
                num_bins=num_bins,
                is_lin=True,
                domain=[0, num_bins],
            ))

        s = rand.rand(n_evts).astype(FTYPE) * num_bins
        sample.append(s)

        bin_edges = [b.edge_magnitudes for b in binning]
        test = histogram(sample,
                         weights,
                         MultiDimBinning(binning),
                         averaged=False)
        ref, _ = np.histogramdd(sample=sample, bins=bin_edges, weights=weights)
        ref = ref.astype(FTYPE).ravel()
        assert recursiveEquality(test, ref), f'\ntest:\n{test}\n\nref:\n{ref}'

        test_avg = histogram(sample,
                             weights,
                             MultiDimBinning(binning),
                             averaged=True)
        ref_counts, _ = np.histogramdd(sample=sample,
                                       bins=bin_edges,
                                       weights=None)
        ref_counts = ref_counts.astype(FTYPE).ravel()
        ref_avg = (ref / ref_counts).astype(FTYPE)
        assert recursiveEquality(test_avg, ref_avg), \
                f'\ntest_avg:\n{test_avg}\n\nref_avg:\n{ref_avg}'

    logging.info('<< PASS : test_histogram >>')
Esempio n. 9
0
def test_container():
    """Unit tests for Container class."""

    # NOTE: Right now the numbers are tuned so that the weights are identical
    # per bin. If you change binning that's likely not the case anymore and you
    # inevitably end up with averaged values over bins, which are then not
    # equal to the individual weights anymore when those are not identical per
    # bin

    n_evts = 10000
    x = np.linspace(0, 100, n_evts, dtype=FTYPE)
    y = np.linspace(0, 100, n_evts, dtype=FTYPE)
    w = np.tile(np.arange(100, dtype=FTYPE) + 0.5, (100, 1)).T.ravel()

    container = Container('test', 'events')
    container['x'] = x
    container['y'] = y
    container['w'] = w

    binning_x = OneDimBinning(name='x',
                              num_bins=100,
                              is_lin=True,
                              domain=[0, 100])
    binning_y = OneDimBinning(name='y',
                              num_bins=100,
                              is_lin=True,
                              domain=[0, 100])
    binning = MultiDimBinning([binning_x, binning_y])

    logging.trace('Testing container and translation methods')

    container.representation = binning
    bx = container['x']
    m = np.meshgrid(binning.midpoints[0].m, binning.midpoints[1].m)[1].ravel()
    assert np.allclose(bx, m, **ALLCLOSE_KW), f'test:\n{bx}\n!= ref:\n{m}'

    # array repr
    container.representation = 'events'
    array_weights = container['w']
    assert np.allclose(array_weights, w,
                       **ALLCLOSE_KW), f'test:\n{array_weights}\n!= ref:\n{w}'

    # binned repr
    container.representation = binning
    diag = np.diag(np.arange(100) + 0.5)
    bd = container['w']
    h = container.get_hist('w')

    assert np.allclose(bd, diag.ravel(),
                       **ALLCLOSE_KW), f'test:\n{bd}\n!= ref:\n{diag.ravel()}'
    assert np.allclose(h[0], diag,
                       **ALLCLOSE_KW), f'test:\n{h[0]}\n!= ref:\n{diag}'
    assert h[1] == binning, f'test:\n{h[1]}\n!= ref:\n{binning}'

    # augment to array repr again
    container.representation = 'events'
    a = container['w']

    assert np.allclose(a, w, **ALLCLOSE_KW), f'test:\n{a}\n!= ref:\n{w}'
Esempio n. 10
0
    def __init__(self,
                 input_names,
                 output_name,
                 input_binning=None,
                 output_binning=None,
                 tex=None,
                 hash=None,
                 error_method=None):  # pylint: disable=redefined-builtin
        # Convert to sequence of single string if a single string was passed
        # for uniform interfacing
        if isinstance(input_names, basestring):
            input_names = [input_names]
        else:
            input_names = [name for name in input_names]
        self._input_names = input_names

        assert isinstance(output_name, basestring)
        self._output_name = output_name

        if input_binning is not None:
            if not isinstance(input_binning, MultiDimBinning):
                if isinstance(input_binning, Sequence):
                    input_binning = MultiDimBinning(input_binning)
                else:
                    input_binning = MultiDimBinning(**input_binning)
            self._input_binning = input_binning
        else:
            self._input_binning = None

        if output_binning is not None:
            if not isinstance(output_binning, MultiDimBinning):
                if isinstance(output_binning, Sequence):
                    output_binning = MultiDimBinning(output_binning)
                else:
                    output_binning = MultiDimBinning(**output_binning)
            self._output_binning = output_binning
        else:
            self._output_binning = None

        self._tex = tex
        self._hash = hash
        if bool(error_method):
            self._error_method = error_method
        else:
            self._error_method = None
Esempio n. 11
0
def validate_calc_grid(calc_grid):
    """Check whether a multi-dimensional binning is suitable for use as
    the grid on which oscillations are calculated for event-by-event
    reweighting."""
    calc_grid = MultiDimBinning(calc_grid)
    dim_names = set(calc_grid.names)
    if not dim_names == set(['true_energy', 'true_coszen']):
        raise ValueError('Oscillation grid must contain "true_energy" and'
                         ' "true_coszen" dimensions, and no more! Got "%s".'
                         % dim_names)
Esempio n. 12
0
def pipeline_cfg_from_states(state_dict):
    """Recover a pipeline cfg containing PISA objects from a raw state.

    When a pipeline configuration is stored to JSON, the PISA objects turn into
    their serialized states. This function looks through the dictionary returned by
    `from_json` and recovers the PISA objects such as `ParamSet` and `MultiDimBinning`.

    It should really become part of PISA file I/O functionality to read and write
    PISA objects inside dictionaries/lists into a JSON and be able to recover
    them...
    """

    # TODO: Make this a core functionality of PISA

    # This is just a mess... some objects have a `from_state` method, some take the
    # unpacked state dict as input, some take the state...

    pipeline_cfg = collections.OrderedDict()
    for stage_key in state_dict.keys():
        # need to check all of this manually... no automatic way to do it :(
        if stage_key == "pipeline":
            pipeline_cfg[stage_key] = copy.deepcopy(state_dict[stage_key])
            pipeline_cfg[stage_key]["output_key"] = tuple(
                pipeline_cfg[stage_key]["output_key"])
            binning_state = pipeline_cfg[stage_key]["output_binning"]
            pipeline_cfg[stage_key]["output_binning"] = MultiDimBinning(
                **binning_state)
            continue
        # undo what we did in `serialize_pipeline_cfg` by splitting the keys into tuples
        tuple_key = tuple(stage_key.split("__"))
        pipeline_cfg[tuple_key] = copy.deepcopy(state_dict[stage_key])
        for k in ["calc_mode", "apply_mode", "node_mode"]:
            if k in pipeline_cfg[tuple_key]:
                if isinstance(pipeline_cfg[tuple_key][k],
                              collections.abc.Mapping):
                    pipeline_cfg[tuple_key][k] = MultiDimBinning(
                        **pipeline_cfg[tuple_key][k])
        if "params" in pipeline_cfg[tuple_key].keys():
            pipeline_cfg[tuple_key]["params"] = ParamSet(
                pipeline_cfg[tuple_key]["params"])
    # if any stage takes any other arguments that we didn't think of here, they
    # won't work
    return pipeline_cfg
Esempio n. 13
0
    def _flatten_to_1d(in_map):
        assert isinstance(in_map, Map)

        bin_name = reduce(add, in_map.binning.names)
        num_bins = np.product(in_map.shape)
        binning = MultiDimBinning([
            OneDimBinning(name=bin_name,
                          num_bins=num_bins,
                          is_lin=True,
                          domain=[0, num_bins])
        ])
        hist = in_map.hist.flatten()

        return Map(name=in_map.name, hist=hist, binning=binning)
Esempio n. 14
0
    def array_to_binned(self, key, src_representation, dest_representation):
        """Histogram data array into binned data
        Parameters
        ----------
        key : str
        src_representation : str
        dest_representation : MultiDimBinning
        #averaged : bool
        #    if True, the histogram entries are averages of the numbers that
        #    end up in a given bin. This for example must be used when oscillation
        #    probabilities are translated.....otherwise we end up with probability*count
        #    per bin
        Notes
        -----
        right now, CPU-only
        """
        # TODO: make work for n-dim
        logging.trace('Transforming %s array to binned data' % (key))

        assert src_representation in self.array_representations
        assert isinstance(dest_representation, MultiDimBinning)

        if not dest_representation.is_irregular:
            sample = []
            dimensions = []
            for d in dest_representation:
                if d.is_log:
                    self.representation = "log_events"
                    sample.append(self[d.name])
                    dimensions.append(
                        OneDimBinning(d.name,
                                      domain=np.log(d.domain.m),
                                      num_bins=d.num_bins))
                else:
                    self.representation = "events"
                    sample.append(self[d.name])
                    dimensions.append(d)
            hist_binning = MultiDimBinning(dimensions)
        else:
            self.representation = src_representation
            sample = [self[name] for name in dest_representation.names]
            hist_binning = dest_representation

        self.representation = src_representation
        weights = self[key]

        hist = histogram(sample, weights, hist_binning, averaged=True)

        return hist
Esempio n. 15
0
    def setup_function(self):
        scale_file = find_resource(self.scale_file)
        logging.info("Loading scaling factors from : %s", scale_file)

        scaling_dict = from_json(scale_file)
        scale_binning = MultiDimBinning(
            **scaling_dict[self.variable]["binning"])

        scale_factors = np.array(scaling_dict[self.variable]["scales"],
                                 dtype=FTYPE)
        logging.info(f"Binning for ad-hoc systematic: \n {str(scale_binning)}")
        logging.info(
            f"scaling factors of ad-hoc systematic:\n {str(scale_factors)}")
        self.data.representation = scale_binning
        for container in self.data:
            container["adhoc_scale_factors"] = scale_factors
Esempio n. 16
0
def test_standard_plots(xsec_file, outdir='./'):
    from pisa.utils.plotter import Plotter
    xsec = genie.get_combined_xsec(xsec_file)

    e_bins = MultiDimBinning(
        [OneDimBinning(name='true_energy', tex=r'E_\nu', num_bins=150,
                       domain=(1E-1, 1E3)*ureg.GeV, is_log=True)]
    )
    xsec.compute_maps(e_bins)

    logging.info('Making plots for genie xsec_maps')
    plot_obj = Plotter(outdir=outdir, stamp='Cross-Section', fmt='png',
                       log=True, size=(12, 8),
                       label=r'Cross-Section ($m^{2}$)')
    maps = xsec.return_mapset()
    plot_obj.plot_xsec(maps, ylim=(1E-43, 1E-37))
Esempio n. 17
0
def test_per_e_plot(xsec_file, outdir='./'):
    from pisa.utils.plotter import Plotter
    xsec = genie.get_combined_xsec(xsec_file)

    e_bins = MultiDimBinning(
        [OneDimBinning(name='true_energy', tex=r'E_\nu', num_bins=200,
                       domain=(1E-1, 1E3)*ureg.GeV, is_log=True)]
    )
    xsec.compute_maps(e_bins)
    xsec.scale_maps(1/e_bins.true_energy.bin_widths.magnitude)

    logging.info('Making plots for genie xsec_maps per energy')
    plot_obj = Plotter(outdir=outdir, stamp='Cross-Section / Energy',
                       fmt='png', log=False, size=(12, 8),
                       label=r'Cross-Section / Energy ($m^{2} GeV^{-1}$)')
    maps = xsec.return_mapset()
    plot_obj.plot_xsec(maps, ylim=(3.5E-41, 3E-40))
Esempio n. 18
0
    def digitize(self, kinds, binning, binning_cols=None):
        """Wrapper for numpy's digitize function."""
        if isinstance(kinds, basestring):
            kinds = [kinds]
        if 'muons' not in kinds and 'noise' not in kinds:
            kinds = self._parse_flavint_groups(kinds)
        kinds = kinds[0]

        if isinstance(binning_cols, basestring):
            binning_cols = [binning_cols]

        # TODO: units of columns, and convert bin edges if necessary
        if isinstance(binning, OneDimBinning):
            binning = MultiDimBinning([binning])
        elif isinstance(binning, MultiDimBinning):
            pass
        elif (isinstance(binning, Iterable)
              and not isinstance(binning, Sequence)):
            binning = list(binning)
        elif isinstance(binning, Sequence):
            pass
        else:
            raise TypeError('Unhandled type %s for `binning`.' % type(binning))

        if isinstance(binning, Sequence):
            raise NotImplementedError(
                'Simle sequences not handled at this time. Please specify a'
                ' OneDimBinning or MultiDimBinning object for `binning`.')
            # assert len(binning_cols) == len(binning)
            # bin_edges = binning

        # TODO: units support for Data will mean we can do `m_as(...)` here!
        bin_edges = [edges.magnitude for edges in binning.bin_edges]
        if binning_cols is None:
            binning_cols = binning.names
        else:
            assert set(binning_cols).issubset(set(binning.names))

        hist_idxs = []
        for colname in binning_cols:
            sample = self[kinds][colname]
            hist_idxs.append(np.digitize(sample, binning[colname].bin_edges.m))
        hist_idxs = np.vstack(hist_idxs).T

        return hist_idxs
Esempio n. 19
0
def histogram_np(sample, weights, binning, apply_weights=True):  # pylint: disable=missing-docstring
    """helper function for numpy historams"""
    binning = MultiDimBinning(binning)

    bin_edges = [edges.magnitude for edges in binning.bin_edges]
    if weights is not None and weights.ndim == 2:
        # that means it's 1-dim data instead of scalars
        hists = []
        for i in range(weights.shape[1]):
            w = weights[:, i] if apply_weights else None
            hist, _ = np.histogramdd(sample=sample, weights=w, bins=bin_edges)
            hists.append(hist.ravel())
        flat_hist = np.stack(hists, axis=1)
    else:
        w = weights if apply_weights else None
        hist, _ = np.histogramdd(sample=sample, weights=w, bins=bin_edges)
        flat_hist = hist.ravel()
    return flat_hist.astype(FTYPE)
Esempio n. 20
0
    def keepInbounds(self, binning):
        """Cut out any events that fall outside `binning`. Note that events
        that fall exactly on an outer edge are kept.

        Parameters
        ----------
        binning : OneDimBinning or MultiDimBinning

        Returns
        -------
        remaining_events : Events

        """
        try:
            binning = OneDimBinning(binning)
        except Exception:
            pass
        if isinstance(binning, OneDimBinning):
            binning = [binning]
        binning = MultiDimBinning(binning)

        current_cuts = self.metadata['cuts']
        new_cuts = [dim.inbounds_criteria for dim in binning]
        unapplied_cuts = [c for c in new_cuts if c not in current_cuts]
        if not unapplied_cuts:
            logging.debug(
                "All inbounds criteria '%s' have already been"
                " applied. Returning events unmodified.", new_cuts)
            return self

        all_cuts = deepcopy(current_cuts) + unapplied_cuts

        # Create a single cut from all unapplied cuts
        keep_criteria = ' & '.join(['(%s)' % c for c in unapplied_cuts])

        # Do the cutting
        remaining_events = self.applyCut(keep_criteria=keep_criteria)

        # Replace the combined 'cuts' string with individual cut strings
        remaining_events.metadata['cuts'] = all_cuts

        return remaining_events
Esempio n. 21
0
def pisa2_map_to_pisa3_map(pisa2_map,
                           ebins_name='ebins',
                           czbins_name='czbins',
                           is_log=True,
                           is_lin=True):
    expected_keys = ['map', 'ebins', 'czbins']
    if sorted(pisa2_map.keys()) != sorted(expected_keys):
        raise ValueError(
            'PISA 2 map should be a dict containining entries: %s' %
            expected_keys)
    ebins = OneDimBinning(name=ebins_name,
                          bin_edges=pisa2_map['ebins'] * ureg.GeV,
                          is_log=is_log,
                          tex='E_{\nu}')
    czbins = OneDimBinning(name=czbins_name,
                           bin_edges=pisa2_map['czbins'],
                           is_lin=is_lin,
                           tex='\cos\theta_Z')
    bins = MultiDimBinning([ebins, czbins])
    return Map(name='pisa2equivalent', hist=pisa2_map['map'], binning=bins)
Esempio n. 22
0
def test_histogram():
    n_evts = 100
    x = np.arange(n_evts, dtype=FTYPE)
    y = np.arange(n_evts, dtype=FTYPE)
    w = np.ones(n_evts, dtype=FTYPE)
    #w *= np.random.rand(n_evts)

    x = SmartArray(x)
    y = SmartArray(y)
    w = SmartArray(w)


    binning_x = OneDimBinning(name='x', num_bins=10, is_lin=True, domain=[0, 100])
    binning_y = OneDimBinning(name='y', num_bins=10, is_lin=True, domain=[0, 100])
    binning = MultiDimBinning([binning_x, binning_y])

    sample = [x, y]
    weights = w
    averaged = True

    histo = histogram(sample, weights, binning, averaged)

    assert np.array_equal(histo.reshape(10, 10), np.zeros(shape=(10, 10)))
Esempio n. 23
0
def create_pseudo_data(toymc_params, seed=None):
    '''
    Create pseudo data consisting of a gaussian peak
    on top of a uniform background
    '''
    if seed is not None:
        np.random.seed(seed)

    binning = toymc_params.binning
    #
    # Gaussian signal peak
    #
    signal = np.random.normal(loc=toymc_params.mu,
                              scale=toymc_params.sigma,
                              size=toymc_params.nsig)

    #
    # Uniform background
    #
    background = np.random.uniform(high=toymc_params.nbackground_high,
                                   low=toymc_params.nbackground_low,
                                   size=toymc_params.nbkg)
    total_data = np.concatenate([signal, background])
    counts_data, _ = np.histogram(total_data, bins=binning.bin_edges.magnitude)

    # Convert data histogram into a pisa map
    data_map = Map(name='total',
                   binning=MultiDimBinning([binning]),
                   hist=counts_data)

    # Set the errors as the sqrt of the counts
    data_map.set_errors(error_hist=np.sqrt(counts_data))

    data_as_mapset = MapSet([data_map])

    return data_as_mapset
Esempio n. 24
0
def parse_pipeline_config(config):
    """Parse pipeline config.

    Parameters
    ----------
    config : string or ConfigParser

    Returns
    -------
    stage_dicts : OrderedDict
        Keys are (stage_name, service_name) tuples and values are OrderedDicts
        with keys the argnames and values the arguments' values. Some known arg
        values are parsed out fully into Python objects, while the rest remain
        as strings that must be used or parsed elsewhere.

    """
    # Note: imports placed here to avoid circular imports
    from pisa.core.binning import MultiDimBinning, OneDimBinning
    from pisa.core.param import ParamSelector

    if isinstance(config, basestring):
        config = from_file(config)
    elif isinstance(config, PISAConfigParser):
        pass
    else:
        raise TypeError(
            '`config` must either be a string or PISAConfigParser. Got %s '
            'instead.' % type(config))

    if not config.has_section('binning'):
        raise NoSectionError(
            "Could not find 'binning'. Only found sections: %s" %
            config.sections())

    # Create binning objects
    binning_dict = {}
    for name, value in config['binning'].items():
        if name.endswith('.order'):
            order = split(config.get('binning', name))
            binning, _ = split(name, sep='.')
            bins = []
            for bin_name in order:
                try:
                    def_raw = config.get('binning', binning + '.' + bin_name)
                except:
                    dims_defined = [
                        split(dim, sep='.')[1]
                        for dim in config['binning'].keys()
                        if dim.startswith(binning +
                                          '.') and not dim.endswith('.order')
                    ]
                    logging.error(
                        "Failed to find definition of '%s' dimension of '%s'"
                        " binning entry. Only found definition(s) of: %s",
                        bin_name, binning, dims_defined)
                    del dims_defined
                    raise
                try:
                    kwargs = eval(def_raw)  # pylint: disable=eval-used
                except:
                    logging.error(
                        "Failed to evaluate definition of '%s' dimension of"
                        " '%s' binning entry:\n'%s'", bin_name, binning,
                        def_raw)
                    raise
                try:
                    bins.append(OneDimBinning(bin_name, **kwargs))
                except:
                    logging.error(
                        "Failed to instantiate new `OneDimBinning` from '%s'"
                        " dimension of '%s' binning entry with definition:\n"
                        "'%s'\n", bin_name, binning, kwargs)
                    raise
            binning_dict[binning] = MultiDimBinning(bins)

    # Pipeline section
    section = 'pipeline'

    # Get and parse the order of the stages (and which services implement them)
    order = [split(x, STAGE_SEP) for x in split(config.get(section, 'order'))]

    param_selections = []
    if config.has_option(section, 'param_selections'):
        param_selections = split(config.get(section, 'param_selections'))

    detector_name = None
    if config.has_option(section, 'detector_name'):
        detector_name = config.get(section, 'detector_name')

    # Parse [stage.<stage_name>] sections and store to stage_dicts
    stage_dicts = OrderedDict()
    for stage, service in order:
        old_section_header = 'stage%s%s' % (STAGE_SEP, stage)
        new_section_header = '%s%s%s' % (stage, STAGE_SEP, service)
        if config.has_section(old_section_header):
            logging.warning(
                '"%s" is an old-style section header, in the future use "%s"' %
                (old_section_header, new_section_header))
            section = old_section_header
        elif config.has_section(new_section_header):
            section = new_section_header
        else:
            raise IOError(
                'missing section in cfg for stage "%s" service "%s"' %
                (stage, service))

        # Instantiate dict to store args to pass to this stage
        service_kwargs = OrderedDict()

        param_selector = ParamSelector(selections=param_selections)
        service_kwargs['params'] = param_selector

        n_params = 0
        for fullname in config.options(section):
            try:
                value = config.get(section, fullname)
            except:
                logging.error(
                    'Unable to obtain value of option "%s" in section "%s".' %
                    (fullname, section))
                raise
            # See if this matches a param specification
            param_match = PARAM_RE.match(fullname)
            if param_match is not None:
                n_params += 1

                param_match_dict = param_match.groupdict()
                param_subfields = param_match_dict['subfields'].split('.')

                # Figure out what the dotted fields represent...
                infodict = interpret_param_subfields(subfields=param_subfields)

                # If field is an attr, skip since these are located manually
                if infodict['attr'] is not None:
                    continue

                # Check if this param already exists in a previous stage; if
                # so, make sure there are no specs for this param, but just a
                # link to previous the param object that is already
                # instantiated.
                for kw in stage_dicts.values():
                    # Stage did not get a `params` argument from config
                    if not kw.has_key('params'):
                        continue

                    # Retrieve the param from the ParamSelector
                    try:
                        param = kw['params'].get(name=infodict['pname'],
                                                 selector=infodict['selector'])
                    except KeyError:
                        continue

                    # Make sure there are no other specs (in this section) for
                    # the param defined defined in previous section
                    for a in PARAM_ATTRS:
                        if config.has_option(section, '%s.%s' % (fullname, a)):
                            raise ValueError("Parameter spec. '%s' of '%s' "
                                             "found in section '%s', but "
                                             "parameter exists in previous "
                                             "stage!" % (a, fullname, section))

                    break

                # Param *not* found in a previous stage (i.e., no explicit
                # `break` encountered in `for` loop above); therefore must
                # instantiate it.
                else:
                    param = parse_param(config=config,
                                        section=section,
                                        selector=infodict['selector'],
                                        fullname=fullname,
                                        pname=infodict['pname'],
                                        value=value)

                param_selector.update(param, selector=infodict['selector'])

            # If it's not a param spec but contains 'binning', assume it's a
            # binning spec for CAKE stages
            elif 'binning' in fullname:
                service_kwargs[fullname] = binning_dict[value]

            # it's gonna be a PI stage
            elif '_specs' in fullname:
                value = parse_string_literal(value)
                # is it None?
                if value is None:
                    service_kwargs[fullname] = value
                # is it evts?
                elif value in ['evnts', 'events']:
                    service_kwargs[fullname] = 'events'
                # so it gotta be a binning
                else:
                    service_kwargs[fullname] = binning_dict[value]

            # it's a list on in/output names list
            elif fullname.endswith('_names'):
                value = split(value)
                service_kwargs[fullname] = value
            # Otherwise it's some other stage instantiation argument; identify
            # this by its full name and try to interpret and instantiate a
            # Python object using the string
            else:
                try:
                    value = parse_quantity(value)
                    value = value.nominal_value * value.units
                except ValueError:
                    value = parse_string_literal(value)
                service_kwargs[fullname] = value

        # If no params actually specified in config, remove 'params' from the
        # service's keyword args
        if n_params == 0:
            service_kwargs.pop('params')

        # Store the service's kwargs to the stage_dicts
        stage_dicts[(stage, service)] = service_kwargs

    stage_dicts['detector_name'] = detector_name
    return stage_dicts
Esempio n. 25
0
def test_BinnedTensorTransform():
    """Unit tests for BinnedTensorTransform class"""
    binning = MultiDimBinning([
        dict(name='energy',
             is_log=True,
             domain=(1, 80) * ureg.GeV,
             num_bins=10),
        dict(name='coszen', is_lin=True, domain=(-1, 0), num_bins=5)
    ])

    nue_map = Map(name='nue',
                  binning=binning,
                  hist=np.random.random(binning.shape))
    nue_map.set_poisson_errors()
    numu_map = Map(name='numu',
                   binning=binning,
                   hist=np.random.random(binning.shape))
    numu_map.set_poisson_errors()
    inputs = MapSet(
        name='inputs',
        maps=[nue_map, numu_map],
    )

    xform0 = BinnedTensorTransform(input_names='nue',
                                   output_name='nue',
                                   input_binning=binning,
                                   output_binning=binning,
                                   xform_array=2 * np.ones(binning.shape))

    xform1 = BinnedTensorTransform(input_names=['numu'],
                                   output_name='numu',
                                   input_binning=binning,
                                   output_binning=binning,
                                   xform_array=3 * np.ones(binning.shape))

    xform2 = BinnedTensorTransform(
        input_names=['nue', 'numu'],
        output_name='nue_numu',
        input_binning=binning,
        output_binning=binning,
        xform_array=np.stack(
            [2 * np.ones(binning.shape), 3 * np.ones(binning.shape)], axis=0))
    assert np.all((xform2 + 2).xform_array - xform2.xform_array == 2)

    testdir = tempfile.mkdtemp()
    try:
        for i, t in enumerate([xform0, xform1, xform2]):
            t_file = os.path.join(testdir, str(i) + '.json')
            t.to_json(t_file)
            t_ = BinnedTensorTransform.from_json(t_file)
            assert t_ == t, 't=\n%s\nt_=\n%s' % (t, t_)
    finally:
        shutil.rmtree(testdir, ignore_errors=True)

    logging.info('<< PASS : test_BinnedTensorTransform >>')

    xforms = TransformSet(name='scaling',
                          transforms=[xform0, xform1, xform2],
                          hash=9)

    assert xforms.hash == 9
    xforms.hash = -20
    assert xforms.hash == -20

    _ = xforms.apply(inputs)

    # TODO: get this working above, then test here!
    #xforms2 = xforms * 2

    testdir = tempfile.mkdtemp()
    try:
        for i, t in enumerate([xforms]):
            t_filename = os.path.join(testdir, str(i) + '.json')
            t.to_json(t_filename)
            t_ = TransformSet.from_json(t_filename)
            assert t_ == t, 't=\n%s\nt_=\n%s' % (t.transforms, t_.transforms)
    finally:
        shutil.rmtree(testdir, ignore_errors=True)

    logging.info('<< PASS : test_TransformSet >>')
Esempio n. 26
0
 def binning(self):
     binning = self._reference_state["binning"]
     if not is_binning(binning):
         binning = MultiDimBinning(**binning)
     return binning
Esempio n. 27
0
    def histogram(self,
                  kinds,
                  binning,
                  binning_cols=None,
                  weights_col=None,
                  errors=False,
                  name=None,
                  tex=None,
                  **kwargs):
        """Histogram the events of all `kinds` specified, with `binning` and
        optionally applying `weights`.

        Parameters
        ----------
        kinds : string, sequence of NuFlavInt, or NuFlavIntGroup
        binning : OneDimBinning, MultiDimBinning or sequence of arrays
            (one array per binning dimension)
        binning_cols : string or sequence of strings
            Bin only these dimensions, ignoring other dimensions in `binning`
        weights_col : None or string
            Column to use for weighting the events
        errors : bool
            Whether to attach errors to the resulting Map
        name : None or string
            Name to give to resulting Map. If None, a default is derived from
            `kinds` and `weights_col`.
        tex : None or string
            TeX label to give to the resulting Map. If None, default is
            dereived from the `name` specified or the derived default.
        **kwargs : Keyword args passed to Map object

        Returns
        -------
        Map : numpy ndarray with as many dimensions as specified by `binning`
            argument

        """
        # TODO: make able to take integer for `binning` and--in combination
        # with units in the Data columns--generate an appropriate
        # MultiDimBinning object, attach this and return the package as a Map.

        if isinstance(kinds, basestring):
            kinds = [kinds]
        if 'muons' not in kinds and 'noise' not in kinds:
            kinds = self._parse_flavint_groups(kinds)
        kinds = kinds[0]

        if isinstance(binning_cols, basestring):
            binning_cols = [binning_cols]
        assert weights_col is None or isinstance(weights_col, basestring)

        # TODO: units of columns, and convert bin edges if necessary
        if isinstance(binning, OneDimBinning):
            binning = MultiDimBinning([binning])
        elif isinstance(binning, MultiDimBinning):
            pass
        elif (isinstance(binning, Iterable)
              and not isinstance(binning, Sequence)):
            binning = list(binning)
        elif isinstance(binning, Sequence):
            pass
        else:
            raise TypeError('Unhandled type %s for `binning`.' % type(binning))

        if isinstance(binning, Sequence):
            raise NotImplementedError(
                'Simle sequences not handled at this time. Please specify a'
                ' OneDimBinning or MultiDimBinning object for `binning`.')
            # assert len(binning_cols) == len(binning)
            # bin_edges = binning

        # TODO: units support for Data will mean we can do `m_as(...)` here!
        bin_edges = [edges.magnitude for edges in binning.bin_edges]
        if binning_cols is None:
            binning_cols = binning.names
        else:
            assert set(binning_cols).issubset(set(binning.names))

        # Extract the columns' data into a list of array(s) for histogramming
        sample = [self[kinds][colname] for colname in binning_cols]
        err_weights = None
        hist_weights = None
        if weights_col is not None:
            hist_weights = self[kinds][weights_col]
            if errors:
                err_weights = np.square(hist_weights)

        hist, edges = np.histogramdd(sample=sample,
                                     weights=hist_weights,
                                     bins=bin_edges)
        if errors:
            sumw2, edges = np.histogramdd(sample=sample,
                                          weights=err_weights,
                                          bins=bin_edges)
            hist = unp.uarray(hist, np.sqrt(sumw2))

        if name is None:
            if tex is None:
                try:
                    tex = kinds.tex
                # TODO: specify specific exception(s)
                except:
                    tex = r'{0}'.format(kinds)
                if weights_col is not None:
                    tex += r', \; {\rm weights} =' + text2tex(weights_col)

            name = str(kinds)
            if weights_col is not None:
                name += ', weights=' + weights_col

        if tex is None:
            tex = text2tex(name)

        return Map(name=name, hist=hist, binning=binning, tex=tex, **kwargs)
Esempio n. 28
0
def test_Events():
    """Unit tests for Events class"""
    from pisa.utils.flavInt import NuFlavInt
    # Instantiate empty object
    events = Events()

    # Instantiate from PISA events HDF5 file
    events = Events(
        'events/events__vlvnt__toy_1_to_80GeV_spidx1.0_cz-1_to_1_1e2evts_set0__unjoined__with_fluxes_honda-2015-spl-solmin-aa.hdf5'
    )

    # Apply a simple cut
    events = events.applyCut('(true_coszen <= 0.5) & (true_energy <= 70)')
    for fi in events.flavints:
        assert np.max(events[fi]['true_coszen']) <= 0.5
        assert np.max(events[fi]['true_energy']) <= 70

    # Apply an "inbounds" cut via a OneDimBinning
    true_e_binning = OneDimBinning(name='true_energy',
                                   num_bins=80,
                                   is_log=True,
                                   domain=[10, 60] * ureg.GeV)
    events = events.keepInbounds(true_e_binning)
    for fi in events.flavints:
        assert np.min(events[fi]['true_energy']) >= 10
        assert np.max(events[fi]['true_energy']) <= 60

    # Apply an "inbounds" cut via a MultiDimBinning
    true_e_binning = OneDimBinning(name='true_energy',
                                   num_bins=80,
                                   is_log=True,
                                   domain=[20, 50] * ureg.GeV)
    true_cz_binning = OneDimBinning(name='true_coszen',
                                    num_bins=40,
                                    is_lin=True,
                                    domain=[-0.8, 0])
    mdb = MultiDimBinning([true_e_binning, true_cz_binning])
    events = events.keepInbounds(mdb)
    for fi in events.flavints:
        assert np.min(events[fi]['true_energy']) >= 20
        assert np.max(events[fi]['true_energy']) <= 50
        assert np.min(events[fi]['true_coszen']) >= -0.8
        assert np.max(events[fi]['true_coszen']) <= 0

    # Now try to apply a cut that fails on one flav/int (since the field will
    # be missing) and make sure that the cut did not get applied anywhere in
    # the end (i.e., it is rolled back)
    sub_evts = events['nutaunc']
    sub_evts.pop('true_energy')
    events['nutaunc'] = sub_evts
    try:
        events = events.applyCut('(true_energy >= 30) & (true_energy <= 40)')
    except Exception:
        pass
    else:
        raise Exception('Should not have been able to apply the cut!')
    for fi in events.flavints:
        if fi == NuFlavInt('nutaunc'):
            continue
        assert np.min(events[fi]['true_energy']) < 30

    logging.info(
        '<< PASS : test_Events >> (note:'
        ' "[   ERROR] Events object is in an inconsistent state. Reverting cut'
        ' for all flavInts." message above **is expected**.)')
Esempio n. 29
0
def get_hist(
    sample,
    binning,
    weights=None,
    bw_method="scott",
    adaptive=True,
    alpha=0.3,
    use_cuda=False,
    coszen_reflection=0.25,
    coszen_name="coszen",
    oversample=1,
    bootstrap=False,
    bootstrap_niter=10,
):
    """Helper function for histograms from KDE

    For description of args see kde_histogramdd()

    Handling the reflctions at the coszen edges

    ToDo:
    ----
    * Handle zenith like coszen? Or better: Define set of variables to perform
      reflection on and reflection parameters (e.g. `reflect_fract` or somesuch
      to stand in for for `coszen_reflection` and `reflect_dims` as standin for
      `coszen_name`; also need some way to specify whether to reflect about lower
      and/or upper edge); each such parameter can either be a single value, or a
      sequence with one value per variable.
    * Any good reason for 0.25 and 'scott' defaults? If not, don't define a
      default and force the user to explicitly set this when function is called.
    """

    if bootstrap and oversample > 1:
        # Because the errors within a bin are highly correlated, they could not just
        # be added in quadrature to create an oversampled histogram with errors.
        raise ValueError("Bootstrapping cannot be combined with oversampling.")

    # the KDE implementation expects an empty weights array instead of `None`
    if weights is None:
        weights = []

    # Get the overall normalization here, because the KDE will be normalized
    # to one and we'll need to rescale in the end
    if len(weights) == 0:
        norm = sample.shape[0]
    else:
        norm = np.sum(weights)

    binning = binning.oversample(oversample)

    # Flip around to satisfy the kde implementation
    x = sample.T

    # Must have same amount of dimensions as binning dimensions
    assert x.shape[0] == len(binning)

    # TODO: What if coszen isn't in binning? Does this fail?
    # Yes, coszen is expected
    cz_bin = binning.index(coszen_name)

    # Swap out cz bin to first place (index 0)
    if cz_bin != 0:
        # Also swap binning:
        new_binning = [binning[coszen_name]]
        for b in binning:
            if b.name != coszen_name:
                new_binning.append(b)
        binning = MultiDimBinning(new_binning)
        x[[0, cz_bin]] = x[[cz_bin, 0]]

    # Check if edge needs to be reflected
    reflect_lower = binning[coszen_name].bin_edges[0] == -1
    reflect_upper = binning[coszen_name].bin_edges[-1] == 1

    # Get the kernel weights

    kde_kwargs = dict(
        weights=weights,
        bw_method=bw_method,
        adaptive=adaptive,
        alpha=alpha,
        use_cuda=use_cuda,
    )
    if bootstrap:
        kernel_weights_adaptive = bootstrap_kde(x,
                                                niter=bootstrap_niter,
                                                **kde_kwargs)
    else:
        kernel_weights_adaptive = gaussian_kde(x, **kde_kwargs)

    # Get the bin centers, where we're going to evaluate the KDEs, and extend
    # the bin range for reflection
    bin_points = []
    for b in binning:
        c = unp.nominal_values(b.weighted_centers)
        if b.name == coszen_name:
            # how many bins to add for reflection
            l = int(len(c) * coszen_reflection)
            if reflect_lower:
                c0 = 2 * c[0] - c[1:l + 1][::-1]
            else:
                c0 = []
            if reflect_upper:
                c1 = 2 * c[-1] - c[-l - 1:-1][::-1]
            else:
                c1 = []
            c = np.concatenate([c0, c, c1])
        bin_points.append(c)

    # Shape including reflection edges
    megashape = (
        binning.shape[0] + (int(reflect_upper) + int(reflect_lower)) * l,
        binning.shape[1],
    )

    # Shape of the reflection edges alone
    minishape = (binning.shape[0] - l, binning.shape[1])

    # Create a set of points
    grid = np.meshgrid(*bin_points, indexing="ij")
    points = np.array([g.ravel() for g in grid])

    # Evaluate KDEs at given points
    if bootstrap:
        hist, errors = kernel_weights_adaptive(points)
        # variances can simply be added together when we apply reflections, we take
        # the root afterwards
        variances = errors**2
    else:
        hist = kernel_weights_adaptive(points)

    # Reshape 1d array into nd
    hist = hist.reshape(megashape)
    if bootstrap:
        variances = variances.reshape(megashape)

    def apply_reflection(hist_):
        # Cut off the reflection edges, mirror them, fill up remaining space with
        # zeros and add to histo
        if reflect_lower:
            hist0 = hist_[0:l, :]
            hist0_0 = np.zeros(minishape)
            hist0 = np.flipud(np.concatenate([hist0_0, hist0]))
            hist_ = hist_[l:, :]
        else:
            hist0 = 0

        if reflect_upper:
            hist1 = hist_[-l:, :]
            hist1_0 = np.zeros(minishape)
            hist1 = np.flipud(np.concatenate([hist1, hist1_0]))
            hist_ = hist_[:-l, :]
        else:
            hist1 = 0

        hist_ = hist_ + hist1 + hist0
        return hist_

    hist = apply_reflection(hist)
    if bootstrap:
        variances = apply_reflection(variances)
        errors = np.sqrt(variances)

    # Bin volumes
    volume = binning.bin_volumes(attach_units=False)
    hist = hist * volume
    if bootstrap:
        errors = errors * volume

    # Downsample, not applicable when bootstrapping
    if oversample != 1:
        for i, b in enumerate(binning):
            hist = np.add.reduceat(hist,
                                   np.arange(0,
                                             len(b.bin_edges) - 1, oversample),
                                   axis=i)

    # Swap back the axes
    if cz_bin != 0:
        hist = np.swapaxes(hist, 0, cz_bin)
        if bootstrap:
            errors = np.swapaxes(errors, 0, cz_bin)

    if bootstrap:
        return hist * norm, errors * norm
    else:
        return hist * norm
Esempio n. 30
0
def kde_histogramdd(sample,
                    binning,
                    weights=None,
                    bw_method="scott",
                    adaptive=True,
                    alpha=0.3,
                    use_cuda=False,
                    coszen_reflection=0.25,
                    coszen_name="coszen",
                    oversample=1,
                    stack_pid=True,
                    bootstrap=False,
                    bootstrap_niter=10):
    """Run kernel density estimation (KDE) for an array of data points, and
    then evaluate them on a histogram-like grid to effectively produce a
    histogram-like output.
    Handles reflection at coszen edges, and will expect coszen to be in the binning

    Based on Sebastian Schoenen's KDE implementation:
    http://code.icecube.wisc.edu/svn/sandbox/schoenen/kde

    Parameters
    ----------
    sample : array
        Shape (N_evts, vars), with vars in the right order corresponding to the
        binning order.

    binning : MultiDimBinning
        A coszen dimension is expected

    weights : None or array
        Same shape as `sample`

    bw_method: string
        'scott' or 'silverman' (see kde module)

    adaptive : bool
        (see kde module)

    alpha : float
        A parameter for the KDEs (see kde module)

    use_cuda : bool
        Run on GPU (only works with <= 2d)

    coszen_reflection : float
        Part (number between 0 and 1) of binning that is reflected at the
        coszen -1 and 1 edges

    coszen_name : string
        Binning name to identify the coszen bin that needs to undergo special
        treatment for reflection

    oversample : int
        Evaluate KDE at more points per bin, takes longer, but is more accurate

    stack_pid : bool
        Treat each pid bin separately, not as another dimension of the KDEs
        Only supported for two additional dimensions, pid binning must be named `pid`

    bootstrap : bool
        Use the ``bootstrap_kde`` class to produce error estimates on the KDE histograms.
        Slow, not recommended during fits.

    bootstrap_niter : int
        Number of bootstrap iterations.

    Returns
    -------
    histogram : numpy.ndarray

    ToDo:
    -----

    * Maybe return Map with binnings attached insted of nd-array?
    * Generalize to handle any dimensions with any reflection criterias

    """
    if weights is not None and len(weights) != sample.shape[0]:
        raise ValueError(
            "Length of sample (%s) and weights (%s) incompatible" %
            (sample.shape[0], len(weights)))

    if not stack_pid:
        return get_hist(sample=sample,
                        binning=binning,
                        weights=weights,
                        bw_method=bw_method,
                        adaptive=adaptive,
                        alpha=alpha,
                        use_cuda=use_cuda,
                        coszen_reflection=coszen_reflection,
                        coszen_name=coszen_name,
                        oversample=oversample,
                        bootstrap=bootstrap,
                        bootstrap_niter=bootstrap_niter)

    # treat pid bins separately
    # asuming we're dealing with 2d apart from PID
    bin_names = copy.copy(binning.names)
    bin_edges = [b.bin_edges.m for b in binning]
    pid_bin = bin_names.index("pid")
    other_bins = [0, 1, 2]
    other_bins.pop(pid_bin)
    bin_names.pop(pid_bin)
    assert len(bin_names) == 2
    pid_bin_edges = bin_edges.pop(pid_bin)
    d2d_binning = []
    for b in binning:
        if b.name != "pid":
            d2d_binning.append(b)
    d2d_binning = MultiDimBinning(d2d_binning)
    pid_stack = []
    if bootstrap:
        pid_stack_errors = []

    for pid in range(len(pid_bin_edges) - 1):
        mask_pid = (sample.T[pid_bin] >= pid_bin_edges[pid]) & (
            sample.T[pid_bin] < pid_bin_edges[pid + 1])
        data = np.array([
            sample.T[other_bins[0]][mask_pid],
            sample.T[other_bins[1]][mask_pid]
        ])

        if weights is None:
            weights_pid = None
        else:
            weights_pid = weights[mask_pid]

        hist_kwargs = dict(sample=data.T,
                           weights=weights_pid,
                           binning=d2d_binning,
                           coszen_name=coszen_name,
                           use_cuda=use_cuda,
                           bw_method=bw_method,
                           alpha=alpha,
                           oversample=oversample,
                           coszen_reflection=coszen_reflection,
                           adaptive=adaptive,
                           bootstrap=bootstrap,
                           bootstrap_niter=bootstrap_niter)
        if bootstrap:
            hist, errors = get_hist(**hist_kwargs)
            pid_stack.append(hist)
            pid_stack_errors.append(errors)
        else:
            pid_stack.append(get_hist(**hist_kwargs))

    hist = np.dstack(pid_stack)
    if bootstrap:
        errors = np.dstack(pid_stack_errors)

    if pid_bin != 2:
        hist = np.swapaxes(hist, pid_bin, 2)
        if bootstrap:
            errors = np.swapaxes(errors, pid_bin, 2)

    if bootstrap:
        return hist, errors
    else:
        return hist