def _smoothed_base_rates(self, labels, concentration=1.0): """Dirichlet-smoothed base rates for each intersecting group in the dataset. """ # Dirichlet smoothing parameters if concentration < 0: raise ValueError("Concentration parameter must be non-negative.") num_classes = 2 # binary label dataset dirichlet_alpha = concentration / num_classes # compute counts for all intersecting groups, e.g. black-women, white-man, etc intersect_groups = np.unique(self.dataset.protected_attributes, axis=0) num_intersects = len(intersect_groups) counts_pos = np.zeros(num_intersects) counts_total = np.zeros(num_intersects) for i in range(num_intersects): condition = [dict(zip(self.dataset.protected_attribute_names, intersect_groups[i]))] counts_total[i] = utils.compute_num_instances( self.dataset.protected_attributes, self.dataset.instance_weights, self.dataset.protected_attribute_names, condition=condition) counts_pos[i] = utils.compute_num_pos_neg( self.dataset.protected_attributes, labels, self.dataset.instance_weights, self.dataset.protected_attribute_names, self.dataset.favorable_label, condition=condition) # probability of y given S (p(y=1|S)) return (counts_pos + dirichlet_alpha) / (counts_total + concentration)
def num_instances(self, privileged=None): """Compute the number of instances, :math:`n`, in the dataset conditioned on protected attributes if necessary. Args: privileged (bool, optional): Boolean prescribing whether to condition this metric on the `privileged_groups`, if `True`, or the `unprivileged_groups`, if `False`. Defaults to `None` meaning this metric is computed over the entire dataset. Raises: AttributeError: `privileged_groups` or `unprivileged_groups` must be must be provided at initialization to condition on them. """ condition = self._to_condition(privileged) return utils.compute_num_instances( self.dataset.protected_attributes, self.dataset.instance_weights, self.dataset.protected_attribute_names, condition=condition)