def _init_hist_binning( self, special_binning: Union[None, BinsInputType, Binning]) -> Binning: if special_binning is None: _binning = Binning( bins=self.variable.n_bins, dimensions=1, scope=self.variable.scope, log_scale=self.variable.use_log_scale, ) # type: Binning else: if isinstance(special_binning, Binning): assert special_binning.dimensions == 1, special_binning.dimensions assert special_binning.log_scale_mask[ 0] == self.variable.use_log_scale, ( special_binning.log_scale_mask, self.variable.use_log_scale, ) if self.variable.has_scope(): assert special_binning.range[0] == self.variable.scope, ( special_binning.range, self.variable.scope) _binning = special_binning else: _binning = Binning( bins=special_binning, dimensions=1, scope=self.variable.scope, log_scale=self.variable.use_log_scale, ) return _binning
def _add_channel_hist_vars(self, channel_name: str, original_binning: Binning) -> None: assert channel_name not in self._channel_variables_per_dim.keys(), \ (channel_name, self._channel_variables_per_dim.keys()) channel_dim_dict = {} # type: Dict[int, HistVariable] for dimension, hist_variable in enumerate(self.variables): binning = original_binning.get_binning_for_one_dimension( dimension=dimension) assert binning.dimensions == 1, binning.dimensions assert len(binning.num_bins) == 1, binning.num_bins assert binning.num_bins[0] == binning.num_bins_total, ( binning.num_bins, binning.num_bins_total) assert len(binning.range) == 1, binning.range assert len(binning.log_scale_mask) == 1, binning.log_scale_mask channel_hist_var_for_dim = HistVariable( df_label=hist_variable.df_label, n_bins=binning.num_bins_total, scope=binning.range[0], var_name=hist_variable.variable_name, unit=hist_variable.unit, use_log_scale=binning.log_scale_mask[0]) channel_dim_dict.update({dimension: channel_hist_var_for_dim}) self._channel_variables_per_dim[channel_name] = channel_dim_dict
def _compare_binning_to_channel_variable_binning(self, channel_name: str, binning: Binning) -> None: assert channel_name in self._channel_variables_per_dim.keys(), \ (channel_name, self._channel_variables_per_dim.keys()) for dimension, variable in self.channel_variables_per_dim_dict[ channel_name].items(): binning_for_dim = binning.get_binning_for_one_dimension( dimension=dimension) assert binning_for_dim.dimensions == 1, ( binning_for_dim.dimensions, variable.df_label) assert len( binning_for_dim.num_bins) == 1, (binning_for_dim.num_bins, variable.df_label) assert binning_for_dim.num_bins[0] == binning_for_dim.num_bins_total, \ (binning_for_dim.num_bins, binning_for_dim.num_bins_total, variable.df_label) assert binning_for_dim.num_bins_total == variable.n_bins, \ (binning.num_bins_total, variable.n_bins, variable.df_label) assert len(binning_for_dim.range) == 1, (binning_for_dim.range, variable.df_label) assert binning_for_dim.range[0] == variable.scope, ( binning.range, variable.scope, variable.df_label) assert len(binning_for_dim.log_scale_mask) == 1, ( binning_for_dim.log_scale_mask, variable.df_label) assert binning_for_dim.log_scale_mask[0] == variable.use_log_scale, \ (binning_for_dim.log_scale_mask, variable.use_log_scale, variable.df_label)
def _init_secondary_var( secondary_hist_variable: HistVariable, secondary_variable_binning: Optional[Binning], ) -> Tuple[HistVariable, Binning]: if secondary_variable_binning is None: binning = Binning( bins=secondary_hist_variable.n_bins, dimensions=1, scope=secondary_hist_variable.scope, log_scale=False, ) # type: Binning else: assert isinstance( secondary_hist_variable, HistVariable), type(secondary_hist_variable).__name__ binning = secondary_variable_binning assert isinstance(binning, Binning), type(binning).__name__ assert binning.dimensions == 1, binning.dimensions assert isinstance(binning.range, tuple), type(binning.range) assert len(binning.range) == 1, (binning.range, len(binning.range)) assert isinstance(binning.range[0], tuple), (type(binning.range[0]), binning.range[0]) assert secondary_hist_variable.scope == binning.range[0], ( secondary_hist_variable.scope, binning.range) return secondary_hist_variable, binning
def find_common_binning_for_distributions( distributions: Sequence[BinnedDistribution]) -> Binning: assert isinstance(distributions, CollectionsABCSequence), type(distributions) assert all(isinstance(dist, BinnedDistribution) for dist in distributions), [type(d) for d in distributions] # If all distributions already have the same binning, return it. if all(dist.binning == distributions[0].binning for dist in distributions): return distributions[0].binning common_dims = distributions[0].dimensions assert all(dist.dimensions == common_dims for dist in distributions), \ ([d.dimensions for d in distributions], common_dims) common_log_scale_mask = distributions[0].binning.log_scale_mask assert all(dist.binning.log_scale_mask == common_log_scale_mask for dist in distributions), \ ([d.binning.log_scale_mask for d in distributions], common_log_scale_mask) # find most general binning: common_ranges = find_ranges_for_distributions(distributions=distributions) n_bins_per_dim = {dim: [] for dim in range(common_dims)} for dist in distributions: num_bins = dist.binning.num_bins for dim in range(common_dims): n_bins_per_dim[dim].append(num_bins[dim]) common_num_bins = tuple( [max(n_bins_per_dim[dim]) for dim in range(common_dims)]) common_binning = Binning(bins=common_num_bins, dimensions=common_dims, scope=common_ranges, log_scale=common_log_scale_mask) return common_binning
def __init__(self, bins: BinsInputType, dimensions: int, scope: ScopeInputType = None, log_scale_mask: LogScaleInputType = False, name: Optional[str] = None, data: Optional[DataInputType] = None, data_column_names: DataColumnNamesInput = None) -> None: self._name = name self._binning = Binning(bins=bins, dimensions=dimensions, scope=scope, log_scale=log_scale_mask) self._bin_counts = None self._bin_errors_sq = None self._shape = self.num_bins self._check_shapes() self._data_column_names = None self._init_data_column_names(data_column_names=data_column_names, data=data) self._base_data = None self._is_empty = True self._bin_covariance_matrix = None self._bin_correlation_matrix = None
def reset_binning_to_use_raw_data_scope(self) -> None: new_binning = Binning( bins=self.binning.bin_edges, dimensions=self.binning.dimensions, scope=self.raw_data_range, log_scale=self.variable.use_log_scale, ) self._binning = new_binning
def primary_binning(self) -> Binning: if self._primary_binning is None: self._primary_binning = Binning( bins=self.primary_hist_var.n_bins, dimensions=1, scope=self.primary_hist_var.scope, log_scale=self.primary_hist_var.use_log_scale, ) return self._primary_binning
def reset_binning_to_use_raw_data_range_of_all(self) -> None: raw_ranges = [hist.raw_data_range for hist in self.histograms] full_raw_range = (min([rr[0] for rr in raw_ranges]), max([rr[1] for rr in raw_ranges])) new_binning = Binning(bins=self._common_binning.bin_edges, dimensions=self._common_binning.dimensions, scope=full_raw_range, log_scale=self._common_variable.use_log_scale) self.update_binning(new_binning=new_binning)
def apply_adapted_binning(self, minimal_bin_count: int = 5, minimal_number_of_bins: int = 7): new_bin_edges = distributions_utility.run_adaptive_binning( distributions=self._get_underlying_distributions(), bin_edges=self.binning.bin_edges, minimal_bin_count=minimal_bin_count, minimal_number_of_bins=minimal_number_of_bins, ) new_binning = Binning( bins=new_bin_edges, dimensions=self.binning.dimensions, scope=self.binning.range, log_scale=self.variable.use_log_scale ) self._binning = new_binning
def __init__(self, variable: HistVariable, hist_type: Optional[str] = None) -> None: if not isinstance(variable, HistVariable): raise ValueError(f"The parameter 'variable' must be a HistVariable instance, " f"but you provided an object of type {type(variable).__name__}") self._variable = variable self._hist_type = self._check_and_return_hist_type(hist_type=hist_type) self._binning = Binning( bins=variable.n_bins, dimensions=1, scope=variable.scope, log_scale=variable.use_log_scale ) self._components = [] # type: List[HistComponent] self._auto_color_index = 0 self._raw_data_scope = None self._covariance_matrix = None self._binning_used_for_covariance_matrix = None
def binning(self) -> Binning: if self._binning is None: assert self.from_hist_var.n_bins == self.to_hist_var.n_bins, ( self.from_hist_var.n_bins, self.to_hist_var.n_bins, ) assert self.from_hist_var.scope == self.to_hist_var.scope, ( self.from_hist_var.scope, self.to_hist_var.scope) assert self.from_hist_var.use_log_scale == self.to_hist_var.use_log_scale, ( self.from_hist_var.use_log_scale, self.to_hist_var.use_log_scale, ) self._binning = Binning( bins=self.from_hist_var.n_bins, dimensions=1, scope=self.from_hist_var.scope, log_scale=self.from_hist_var.use_log_scale, ) return self._binning
def get_projection_on(self, dimension: int) -> Tuple[np.ndarray, Binning]: # TODO: Requires special treatment of the bin errors and should return these correctly reduced errors also!!! if dimension < 0 or dimension >= self.dimensions: raise ValueError( f"Parameter 'dimension' must be in [0, {self.dimensions - 1}] " f"as the distribution has {self.dimensions} dimensions! You provided {dimension}." ) other_dimensions = tuple(dim for dim in range(self.dimensions) if dim != dimension) projected_bin_count = self.bin_counts.sum(axis=other_dimensions) assert len(projected_bin_count.shape) == 1, projected_bin_count.shape reduced_binning = Binning(bins=self.bin_edges[dimension], dimensions=1, scope=self.range[dimension]) assert len(projected_bin_count) == self.num_bins[dimension], \ (len(projected_bin_count), self.num_bins[dimension]) return projected_bin_count, reduced_binning