예제 #1
0
    def _init_hist_binning(
            self, special_binning: Union[None, BinsInputType,
                                         Binning]) -> Binning:
        if special_binning is None:
            _binning = Binning(
                bins=self.variable.n_bins,
                dimensions=1,
                scope=self.variable.scope,
                log_scale=self.variable.use_log_scale,
            )  # type: Binning
        else:
            if isinstance(special_binning, Binning):
                assert special_binning.dimensions == 1, special_binning.dimensions
                assert special_binning.log_scale_mask[
                    0] == self.variable.use_log_scale, (
                        special_binning.log_scale_mask,
                        self.variable.use_log_scale,
                    )
                if self.variable.has_scope():
                    assert special_binning.range[0] == self.variable.scope, (
                        special_binning.range, self.variable.scope)
                _binning = special_binning
            else:
                _binning = Binning(
                    bins=special_binning,
                    dimensions=1,
                    scope=self.variable.scope,
                    log_scale=self.variable.use_log_scale,
                )

        return _binning
    def _add_channel_hist_vars(self, channel_name: str,
                               original_binning: Binning) -> None:
        assert channel_name not in self._channel_variables_per_dim.keys(), \
            (channel_name, self._channel_variables_per_dim.keys())

        channel_dim_dict = {}  # type: Dict[int, HistVariable]
        for dimension, hist_variable in enumerate(self.variables):
            binning = original_binning.get_binning_for_one_dimension(
                dimension=dimension)
            assert binning.dimensions == 1, binning.dimensions
            assert len(binning.num_bins) == 1, binning.num_bins
            assert binning.num_bins[0] == binning.num_bins_total, (
                binning.num_bins, binning.num_bins_total)
            assert len(binning.range) == 1, binning.range
            assert len(binning.log_scale_mask) == 1, binning.log_scale_mask

            channel_hist_var_for_dim = HistVariable(
                df_label=hist_variable.df_label,
                n_bins=binning.num_bins_total,
                scope=binning.range[0],
                var_name=hist_variable.variable_name,
                unit=hist_variable.unit,
                use_log_scale=binning.log_scale_mask[0])
            channel_dim_dict.update({dimension: channel_hist_var_for_dim})

        self._channel_variables_per_dim[channel_name] = channel_dim_dict
    def _compare_binning_to_channel_variable_binning(self, channel_name: str,
                                                     binning: Binning) -> None:
        assert channel_name in self._channel_variables_per_dim.keys(), \
            (channel_name, self._channel_variables_per_dim.keys())

        for dimension, variable in self.channel_variables_per_dim_dict[
                channel_name].items():
            binning_for_dim = binning.get_binning_for_one_dimension(
                dimension=dimension)
            assert binning_for_dim.dimensions == 1, (
                binning_for_dim.dimensions, variable.df_label)

            assert len(
                binning_for_dim.num_bins) == 1, (binning_for_dim.num_bins,
                                                 variable.df_label)
            assert binning_for_dim.num_bins[0] == binning_for_dim.num_bins_total, \
                (binning_for_dim.num_bins, binning_for_dim.num_bins_total, variable.df_label)

            assert binning_for_dim.num_bins_total == variable.n_bins, \
                (binning.num_bins_total, variable.n_bins, variable.df_label)

            assert len(binning_for_dim.range) == 1, (binning_for_dim.range,
                                                     variable.df_label)
            assert binning_for_dim.range[0] == variable.scope, (
                binning.range, variable.scope, variable.df_label)

            assert len(binning_for_dim.log_scale_mask) == 1, (
                binning_for_dim.log_scale_mask, variable.df_label)
            assert binning_for_dim.log_scale_mask[0] == variable.use_log_scale, \
                (binning_for_dim.log_scale_mask, variable.use_log_scale, variable.df_label)
예제 #4
0
    def _init_secondary_var(
        secondary_hist_variable: HistVariable,
        secondary_variable_binning: Optional[Binning],
    ) -> Tuple[HistVariable, Binning]:
        if secondary_variable_binning is None:
            binning = Binning(
                bins=secondary_hist_variable.n_bins,
                dimensions=1,
                scope=secondary_hist_variable.scope,
                log_scale=False,
            )  # type: Binning
        else:
            assert isinstance(
                secondary_hist_variable,
                HistVariable), type(secondary_hist_variable).__name__
            binning = secondary_variable_binning

        assert isinstance(binning, Binning), type(binning).__name__

        assert binning.dimensions == 1, binning.dimensions
        assert isinstance(binning.range, tuple), type(binning.range)
        assert len(binning.range) == 1, (binning.range, len(binning.range))
        assert isinstance(binning.range[0],
                          tuple), (type(binning.range[0]), binning.range[0])

        assert secondary_hist_variable.scope == binning.range[0], (
            secondary_hist_variable.scope, binning.range)
        return secondary_hist_variable, binning
def find_common_binning_for_distributions(
        distributions: Sequence[BinnedDistribution]) -> Binning:
    assert isinstance(distributions,
                      CollectionsABCSequence), type(distributions)
    assert all(isinstance(dist, BinnedDistribution)
               for dist in distributions), [type(d) for d in distributions]

    # If all distributions already have the same binning, return it.
    if all(dist.binning == distributions[0].binning for dist in distributions):
        return distributions[0].binning

    common_dims = distributions[0].dimensions
    assert all(dist.dimensions == common_dims for dist in distributions), \
        ([d.dimensions for d in distributions], common_dims)

    common_log_scale_mask = distributions[0].binning.log_scale_mask
    assert all(dist.binning.log_scale_mask == common_log_scale_mask for dist in distributions), \
        ([d.binning.log_scale_mask for d in distributions], common_log_scale_mask)

    # find most general binning:
    common_ranges = find_ranges_for_distributions(distributions=distributions)
    n_bins_per_dim = {dim: [] for dim in range(common_dims)}
    for dist in distributions:
        num_bins = dist.binning.num_bins
        for dim in range(common_dims):
            n_bins_per_dim[dim].append(num_bins[dim])
    common_num_bins = tuple(
        [max(n_bins_per_dim[dim]) for dim in range(common_dims)])
    common_binning = Binning(bins=common_num_bins,
                             dimensions=common_dims,
                             scope=common_ranges,
                             log_scale=common_log_scale_mask)

    return common_binning
예제 #6
0
    def __init__(self,
                 bins: BinsInputType,
                 dimensions: int,
                 scope: ScopeInputType = None,
                 log_scale_mask: LogScaleInputType = False,
                 name: Optional[str] = None,
                 data: Optional[DataInputType] = None,
                 data_column_names: DataColumnNamesInput = None) -> None:
        self._name = name
        self._binning = Binning(bins=bins,
                                dimensions=dimensions,
                                scope=scope,
                                log_scale=log_scale_mask)

        self._bin_counts = None
        self._bin_errors_sq = None
        self._shape = self.num_bins
        self._check_shapes()

        self._data_column_names = None
        self._init_data_column_names(data_column_names=data_column_names,
                                     data=data)

        self._base_data = None
        self._is_empty = True

        self._bin_covariance_matrix = None
        self._bin_correlation_matrix = None
예제 #7
0
 def reset_binning_to_use_raw_data_scope(self) -> None:
     new_binning = Binning(
         bins=self.binning.bin_edges,
         dimensions=self.binning.dimensions,
         scope=self.raw_data_range,
         log_scale=self.variable.use_log_scale,
     )
     self._binning = new_binning
예제 #8
0
 def primary_binning(self) -> Binning:
     if self._primary_binning is None:
         self._primary_binning = Binning(
             bins=self.primary_hist_var.n_bins,
             dimensions=1,
             scope=self.primary_hist_var.scope,
             log_scale=self.primary_hist_var.use_log_scale,
         )
     return self._primary_binning
예제 #9
0
    def reset_binning_to_use_raw_data_range_of_all(self) -> None:
        raw_ranges = [hist.raw_data_range for hist in self.histograms]
        full_raw_range = (min([rr[0] for rr in raw_ranges]),
                          max([rr[1] for rr in raw_ranges]))

        new_binning = Binning(bins=self._common_binning.bin_edges,
                              dimensions=self._common_binning.dimensions,
                              scope=full_raw_range,
                              log_scale=self._common_variable.use_log_scale)

        self.update_binning(new_binning=new_binning)
예제 #10
0
 def apply_adapted_binning(self, minimal_bin_count: int = 5, minimal_number_of_bins: int = 7):
     new_bin_edges = distributions_utility.run_adaptive_binning(
         distributions=self._get_underlying_distributions(),
         bin_edges=self.binning.bin_edges,
         minimal_bin_count=minimal_bin_count,
         minimal_number_of_bins=minimal_number_of_bins,
     )
     new_binning = Binning(
         bins=new_bin_edges,
         dimensions=self.binning.dimensions,
         scope=self.binning.range,
         log_scale=self.variable.use_log_scale
     )
     self._binning = new_binning
예제 #11
0
    def __init__(self, variable: HistVariable, hist_type: Optional[str] = None) -> None:
        if not isinstance(variable, HistVariable):
            raise ValueError(f"The parameter 'variable' must be a HistVariable instance, "
                             f"but you provided an object of type {type(variable).__name__}")
        self._variable = variable
        self._hist_type = self._check_and_return_hist_type(hist_type=hist_type)

        self._binning = Binning(
            bins=variable.n_bins,
            dimensions=1,
            scope=variable.scope,
            log_scale=variable.use_log_scale
        )

        self._components = []  # type: List[HistComponent]
        self._auto_color_index = 0

        self._raw_data_scope = None
        self._covariance_matrix = None
        self._binning_used_for_covariance_matrix = None
예제 #12
0
    def binning(self) -> Binning:
        if self._binning is None:
            assert self.from_hist_var.n_bins == self.to_hist_var.n_bins, (
                self.from_hist_var.n_bins,
                self.to_hist_var.n_bins,
            )
            assert self.from_hist_var.scope == self.to_hist_var.scope, (
                self.from_hist_var.scope, self.to_hist_var.scope)
            assert self.from_hist_var.use_log_scale == self.to_hist_var.use_log_scale, (
                self.from_hist_var.use_log_scale,
                self.to_hist_var.use_log_scale,
            )

            self._binning = Binning(
                bins=self.from_hist_var.n_bins,
                dimensions=1,
                scope=self.from_hist_var.scope,
                log_scale=self.from_hist_var.use_log_scale,
            )
        return self._binning
예제 #13
0
    def get_projection_on(self, dimension: int) -> Tuple[np.ndarray, Binning]:
        # TODO: Requires special treatment of the bin errors and should return these correctly reduced errors also!!!
        if dimension < 0 or dimension >= self.dimensions:
            raise ValueError(
                f"Parameter 'dimension' must be in [0, {self.dimensions - 1}] "
                f"as the distribution has {self.dimensions} dimensions! You provided {dimension}."
            )
        other_dimensions = tuple(dim for dim in range(self.dimensions)
                                 if dim != dimension)
        projected_bin_count = self.bin_counts.sum(axis=other_dimensions)
        assert len(projected_bin_count.shape) == 1, projected_bin_count.shape

        reduced_binning = Binning(bins=self.bin_edges[dimension],
                                  dimensions=1,
                                  scope=self.range[dimension])

        assert len(projected_bin_count) == self.num_bins[dimension], \
            (len(projected_bin_count), self.num_bins[dimension])

        return projected_bin_count, reduced_binning