Ejemplo n.º 1
0
    def fit(self, original, target, original_weight=None, target_weight=None):
        """
        Prepare reweighting formula by computing histograms.

        :param original: values from original distribution, array-like of shape [n_samples, n_features]
        :param target: values from target distribution, array-like of shape [n_samples, n_features]
        :param original_weight: weights for samples of original distributions
        :param target_weight: weights for samples of original distributions
        :return: self
        """
        self.n_features_ = None
        original, original_weight = self._normalize_input(original, original_weight)
        target, target_weight = self._normalize_input(target, target_weight)
        target_perc = numpy.linspace(0, 1, self.n_percentiles + 1)[1:-1]
        self.edges = []
        for axis in range(self.n_features_):
            self.edges.append(weighted_quantile(target[:, axis], quantiles=target_perc, sample_weight=target_weight))

        bins_weights = []
        for data, weights in [(original, original_weight), (target, target_weight)]:
            bin_indices = self.compute_bin_indices(data)
            bin_w = bincount_nd(bin_indices, weights=weights, shape=[self.n_percentiles] * self.n_features_)
            smeared_weights = gaussian_filter(bin_w, sigma=self.n_neighs, truncate=2.5)
            bins_weights.append(smeared_weights.clip(self.min_in_the_bin))
        bin_orig_weights, bin_targ_weights = bins_weights
        self.transition = bin_targ_weights / bin_orig_weights
        return self
Ejemplo n.º 2
0
def check_weighted_percentile(size=100, q_size=20):
    random = RandomState()
    array = random.permutation(size)
    quantiles = random.uniform(size=q_size)
    q_permutation = random.permutation(q_size)
    result1 = weighted_quantile(array, quantiles)[q_permutation]
    result2 = weighted_quantile(array, quantiles[q_permutation])
    result3 = weighted_quantile(array[random.permutation(size)], quantiles[q_permutation])
    assert numpy.all(result1 == result2) and numpy.all(result1 == result3), 'breaks on permutations'

    # checks that order is kept
    quantiles = numpy.linspace(0, 1, size * 3)
    x = weighted_quantile(array, quantiles, sample_weight=random.exponential(size=size))
    assert numpy.all(x == numpy.sort(x)), "doesn't preserve order"

    array = numpy.array([0, 1, 2, 5])
    # comparing with simple percentiles
    for x in random.uniform(size=10):
        assert numpy.abs(numpy.percentile(array, x * 100) - weighted_quantile(array, x, old_style=True)) < 1e-7, \
            "doesn't coincide with numpy.percentile"
Ejemplo n.º 3
0
def check_weighted_percentile(size=100, q_size=20):
    random = RandomState()
    array = random.permutation(size)
    quantiles = random.uniform(size=q_size)
    q_permutation = random.permutation(q_size)
    result1 = weighted_quantile(array, quantiles)[q_permutation]
    result2 = weighted_quantile(array, quantiles[q_permutation])
    result3 = weighted_quantile(array[random.permutation(size)],
                                quantiles[q_permutation])
    assert numpy.all(result1 == result2) and numpy.all(
        result1 == result3), 'breaks on permutations'

    # checks that order is kept
    quantiles = numpy.linspace(0, 1, size * 3)
    x = weighted_quantile(array,
                          quantiles,
                          sample_weight=random.exponential(size=size))
    assert numpy.all(x == numpy.sort(x)), "doesn't preserve order"

    array = numpy.array([0, 1, 2, 5])
    # comparing with simple percentiles
    for x in random.uniform(size=10):
        assert numpy.abs(numpy.percentile(array, x * 100) - weighted_quantile(array, x, old_style=True)) < 1e-7, \
            "doesn't coincide with numpy.percentile"
Ejemplo n.º 4
0
    def fit(self, original, target, original_weight=None, target_weight=None):
        """
        Prepare reweighting formula by computing histograms.

        :param original: values from original distribution, array-like of shape [n_samples, n_features]
        :param target: values from target distribution, array-like of shape [n_samples, n_features]
        :param original_weight: weights for samples of original distributions
        :param target_weight: weights for samples of original distributions
        :return: self
        """
        self.n_features_ = None
        original, original_weight = self._normalize_input(
            original, original_weight)
        target, target_weight = self._normalize_input(target, target_weight)
        target_perc = numpy.linspace(0, 1, self.n_percentiles + 1)[1:-1]
        self.edges = []
        for axis in range(self.n_features_):
            self.edges.append(
                weighted_quantile(target[:, axis],
                                  quantiles=target_perc,
                                  sample_weight=target_weight))

        bins_weights = []
        for data, weights in [(original, original_weight),
                              (target, target_weight)]:
            bin_indices = self.compute_bin_indices(data)
            bin_w = bincount_nd(bin_indices,
                                weights=weights,
                                shape=[self.n_percentiles] * self.n_features_)
            smeared_weights = gaussian_filter(bin_w,
                                              sigma=self.n_neighs,
                                              truncate=2.5)
            bins_weights.append(smeared_weights.clip(self.min_in_the_bin))
        bin_orig_weights, bin_targ_weights = bins_weights
        self.transition = bin_targ_weights / bin_orig_weights
        return self