def check_weighted_percentile(size=100, q_size=20): random = RandomState() array = random.permutation(size) quantiles = random.uniform(size=q_size) q_permutation = random.permutation(q_size) result1 = weighted_percentile(array, quantiles)[q_permutation] result2 = weighted_percentile(array, quantiles[q_permutation]) result3 = weighted_percentile(array[random.permutation(size)], quantiles[q_permutation]) assert numpy.all(result1 == result2) and numpy.all(result1 == result3), 'breaks on permutations' # checks that order is kept quantiles = numpy.linspace(0, 1, size * 3) x = weighted_percentile(array, quantiles, sample_weight=random.exponential(size=size)) assert numpy.all(x == numpy.sort(x)), "doesn't preserve order" array = numpy.array([0, 1, 2, 5]) # comparing with simple percentiles for x in random.uniform(size=10): assert numpy.abs(numpy.percentile(array, x * 100) - weighted_percentile(array, x, old_style=True)) < 1e-7, \ "doesn't coincide with numpy.percentile"
def __call__(self, y, proba, sample_weight): y_pred = proba[self._mask, self.uniform_label] if self.target_rcp is None: self.target_rcp = [0.5, 0.6, 0.7, 0.8, 0.9] result = 0. cuts = weighted_percentile(y_pred, self.target_rcp, sample_weight=self._masked_weight) for cut in cuts: bin_efficiencies = ut.compute_bin_efficiencies(y_pred, bin_indices=self._bin_indices, cut=cut, sample_weight=self._masked_weight) result += ut.theil(bin_efficiencies, weights=self._bin_weights) return result / len(cuts)
def __call__(self, y, proba, sample_weight): y_pred = proba[self._mask, self.uniform_label] if self.target_rcp is None: self.target_rcp = [0.5, 0.6, 0.7, 0.8, 0.9] self.target_rcp = numpy.array(self.target_rcp) result = 0. cuts = weighted_percentile(y_pred, percentiles=1 - self.target_rcp, sample_weight=self._masked_weight) for cut in cuts: groups_efficiencies = ut.compute_group_efficiencies(y_pred, groups_indices=self._groups_indices, cut=cut, sample_weight=self._masked_weight) result += ut.weighted_deviation(groups_efficiencies, weights=self._group_weights, power=self.power) return (result / len(cuts)) ** (1. / self.power)
def check_weighted_percentile(size=100, q_size=20): random = RandomState() array = random.permutation(size) quantiles = random.uniform(size=q_size) q_permutation = random.permutation(q_size) result1 = weighted_percentile(array, quantiles)[q_permutation] result2 = weighted_percentile(array, quantiles[q_permutation]) result3 = weighted_percentile(array[random.permutation(size)], quantiles[q_permutation]) assert numpy.all(result1 == result2) and numpy.all( result1 == result3), 'breaks on permutations' # checks that order is kept quantiles = numpy.linspace(0, 1, size * 3) x = weighted_percentile(array, quantiles, sample_weight=random.exponential(size=size)) assert numpy.all(x == numpy.sort(x)), "doesn't preserve order" array = numpy.array([0, 1, 2, 5]) # comparing with simple percentiles for x in random.uniform(size=10): assert numpy.abs(numpy.percentile(array, x * 100) - weighted_percentile(array, x, old_style=True)) < 1e-7, \ "doesn't coincide with numpy.percentile"
def __call__(self, y, proba, sample_weight): y_pred = proba[self._mask, self.uniform_label] if self.target_rcp is None: self.target_rcp = [0.5, 0.6, 0.7, 0.8, 0.9] result = 0. cuts = weighted_percentile(y_pred, self.target_rcp, sample_weight=self._masked_weight) for cut in cuts: bin_efficiencies = ut.compute_bin_efficiencies( y_pred, bin_indices=self._bin_indices, cut=cut, sample_weight=self._masked_weight) result += ut.theil(bin_efficiencies, weights=self._bin_weights) return result / len(cuts)
def __call__(self, y, proba, sample_weight): y_pred = proba[self._mask, self.uniform_label] if self.target_rcp is None: self.target_rcp = [0.5, 0.6, 0.7, 0.8, 0.9] self.target_rcp = numpy.array(self.target_rcp) result = 0. cuts = weighted_percentile(y_pred, percentiles=1 - self.target_rcp, sample_weight=self._masked_weight) for cut in cuts: groups_efficiencies = ut.compute_group_efficiencies( y_pred, groups_indices=self._groups_indices, cut=cut, sample_weight=self._masked_weight) result += ut.weighted_deviation(groups_efficiencies, weights=self._group_weights, power=self.power) return (result / len(cuts))**(1. / self.power)