Ejemplo n.º 1
0
def test_weighted_percentile_2d():
    # Check for when array 2D and sample_weight 1D
    rng = np.random.RandomState(0)
    x1 = rng.randint(10, size=10)
    w1 = rng.choice(5, size=10)

    x2 = rng.randint(20, size=10)
    x_2d = np.vstack((x1, x2)).T

    w_median = _weighted_percentile(x_2d, w1)
    p_axis_0 = [
        _weighted_percentile(x_2d[:, i], w1) for i in range(x_2d.shape[1])
    ]
    assert_allclose(w_median, p_axis_0)

    # Check when array and sample_weight boht 2D
    w2 = rng.choice(5, size=10)
    w_2d = np.vstack((w1, w2)).T

    w_median = _weighted_percentile(x_2d, w_2d)
    p_axis_0 = [
        _weighted_percentile(x_2d[:, i], w_2d[:, i])
        for i in range(x_2d.shape[1])
    ]
    assert_allclose(w_median, p_axis_0)
Ejemplo n.º 2
0
def test_weighted_percentile_zero_weight_zero_percentile():
    y = np.array([0, 1, 2, 3, 4, 5])
    sw = np.array([0, 0, 1, 1, 1, 0])
    score = _weighted_percentile(y, sw, 0)
    assert approx(score) == 2

    score = _weighted_percentile(y, sw, 50)
    assert approx(score) == 3

    score = _weighted_percentile(y, sw, 100)
    assert approx(score) == 4
Ejemplo n.º 3
0
def test_dummy_regressor_sample_weight(n_samples=10):
    random_state = np.random.RandomState(seed=1)

    X = [[0]] * n_samples
    y = random_state.rand(n_samples)
    sample_weight = random_state.rand(n_samples)

    est = DummyRegressor(strategy="mean").fit(X, y, sample_weight)
    assert est.constant_ == np.average(y, weights=sample_weight)

    est = DummyRegressor(strategy="median").fit(X, y, sample_weight)
    assert est.constant_ == _weighted_percentile(y, sample_weight, 50.0)

    est = DummyRegressor(strategy="quantile", quantile=0.95).fit(X, y, sample_weight)
    assert est.constant_ == _weighted_percentile(y, sample_weight, 95.0)
Ejemplo n.º 4
0
def test_dummy_regressor_sample_weight(n_samples=10):
    random_state = np.random.RandomState(seed=1)

    X = [[0]] * n_samples
    y = random_state.rand(n_samples)
    sample_weight = random_state.rand(n_samples)

    est = DummyRegressor(strategy="mean").fit(X, y, sample_weight)
    assert_equal(est.constant_, np.average(y, weights=sample_weight))

    est = DummyRegressor(strategy="median").fit(X, y, sample_weight)
    assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 50.0))

    est = DummyRegressor(strategy="quantile", quantile=0.95).fit(X, y, sample_weight)
    assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 95.0))
Ejemplo n.º 5
0
def test_weighted_percentile_zero_weight():
    y = np.empty(102, dtype=np.float64)
    y.fill(1.0)
    sw = np.ones(102, dtype=np.float64)
    sw.fill(0.0)
    score = _weighted_percentile(y, sw, 50)
    assert score == 1.0
Ejemplo n.º 6
0
def test_weighted_percentile_equal():
    y = np.empty(102, dtype=np.float64)
    y.fill(0.0)
    sw = np.ones(102, dtype=np.float64)
    sw[-1] = 0.0
    score = _weighted_percentile(y, sw, 50)
    assert score == 0
def test_weighted_percentile_zero_weight():
    y = np.empty(102, dtype=np.float64)
    y.fill(1.0)
    sw = np.ones(102, dtype=np.float64)
    sw.fill(0.0)
    score = _weighted_percentile(y, sw, 50)
    assert score == 1.0
def test_weighted_percentile_equal():
    y = np.empty(102, dtype=np.float64)
    y.fill(0.0)
    sw = np.ones(102, dtype=np.float64)
    sw[-1] = 0.0
    score = _weighted_percentile(y, sw, 50)
    assert score == 0
Ejemplo n.º 9
0
def median_absolute_error(y_true, y_pred, *, multioutput='uniform_average',
                          sample_weight=None):
    """Median absolute error regression loss.
    Median absolute error output is non-negative floating point. The best value
    is 0.0. Read more in the :ref:`User Guide <median_absolute_error>`.
    Parameters
    ----------
    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.
    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.
    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values. Array-like value defines
        weights used to average errors.
        'raw_values' :
            Returns a full set of errors in case of multioutput input.
        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.
    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.
        .. versionadded:: 0.24
    Returns
    -------
    loss : float or ndarray of floats
        If multioutput is 'raw_values', then mean absolute error is returned
        for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.
    Examples
    --------
    >>> from sklearn.metrics import median_absolute_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> median_absolute_error(y_true, y_pred)
    0.5
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> median_absolute_error(y_true, y_pred)
    0.75
    >>> median_absolute_error(y_true, y_pred, multioutput='raw_values')
    array([0.5, 1. ])
    >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.85
    """
    y_type, y_true, y_pred, multioutput = _check_reg_targets(
        y_true, y_pred, multioutput)
    output_errors = np.empty(shape=y_true.shape, dtype=object)
    for k, (y_t, y_p) in enumerate(zip(y_true, y_pred)):
        if sample_weight is None:
            output_errors[k] = np.median(np.abs(y_t - y_p), axis=0)
        else:
            output_errors[k] = _weighted_percentile(np.abs(y_pred - y_true),
                                                    sample_weight=sample_weight)
        if isinstance(multioutput, str):
            if multioutput == 'uniform_average':
                # pass None as weights to np.average: uniform mean
                output_errors[k] = np.average(output_errors[k], weights=None)

    return np.average(output_errors, weights=None)
Ejemplo n.º 10
0
def test_weighted_percentile():
    y = np.empty(102, dtype=np.float64)
    y[:50] = 0
    y[-51:] = 2
    y[-1] = 100000
    y[50] = 1
    sw = np.ones(102, dtype=np.float64)
    sw[-1] = 0.0
    score = _weighted_percentile(y, sw, 50)
    assert score == 1
Ejemplo n.º 11
0
def test_weighted_median_equal_weights():
    # Checks weighted percentile=0.5 is same as median when weights equal
    rng = np.random.RandomState(0)
    # Odd size as _weighted_percentile takes lower weighted percentile
    x = rng.randint(10, size=11)
    weights = np.ones(x.shape)

    median = np.median(x)
    w_median = _weighted_percentile(x, weights)
    assert median == approx(w_median)
def test_weighted_percentile():
    y = np.empty(102, dtype=np.float64)
    y[:50] = 0
    y[-51:] = 2
    y[-1] = 100000
    y[50] = 1
    sw = np.ones(102, dtype=np.float64)
    sw[-1] = 0.0
    score = _weighted_percentile(y, sw, 50)
    assert score == 1
Ejemplo n.º 13
0
def test_weighted_median_integer_weights():
    # Checks weighted percentile=0.5 is same as median when manually weight
    # data
    rng = np.random.RandomState(0)
    x = rng.randint(20, size=10)
    weights = rng.choice(5, size=10)
    x_manual = np.repeat(x, weights)

    median = np.median(x_manual)
    w_median = _weighted_percentile(x, weights)

    assert median == approx(w_median)
Ejemplo n.º 14
0
    def fit(self, X, y, sample_weight=None):

        allowed_strategies = ("mean", "median", "quantile", "constant", "std")
        if self.strategy not in allowed_strategies:
            raise ValueError("Unknown strategy type: %s, expected one of %s." %
                             (self.strategy, allowed_strategies))

        y = check_array(y, ensure_2d=False)
        if len(y) == 0:
            raise ValueError("y must not be empty.")

        self.output_2d_ = y.ndim == 2
        if y.ndim == 1:
            y = np.reshape(y, (-1, 1))
        self.n_outputs_ = y.shape[1]

        check_consistent_length(X, y, sample_weight)

        if self.strategy == "mean":
            self.constant_ = np.average(y, axis=0, weights=sample_weight)

        elif self.strategy == "std":
            self.constant_ = np.std(y, axis=0)

        elif self.strategy == "median":
            if sample_weight is None:
                self.constant_ = np.median(y, axis=0)
            else:
                self.constant_ = [
                    _weighted_percentile(y[:, k],
                                         sample_weight,
                                         percentile=50.)
                    for k in range(self.n_outputs_)
                ]

        elif self.strategy == "quantile":
            if self.quantile is None or not np.isscalar(self.quantile):
                raise ValueError("Quantile must be a scalar in the range "
                                 "[0.0, 1.0], but got %s." % self.quantile)

            percentile = self.quantile * 100.0
            if sample_weight is None:
                self.constant_ = np.percentile(y, axis=0, q=percentile)
            else:
                self.constant_ = [
                    _weighted_percentile(y[:, k],
                                         sample_weight,
                                         percentile=percentile)
                    for k in range(self.n_outputs_)
                ]

        elif self.strategy == "constant":
            if self.constant is None:
                raise TypeError("Constant target value has to be specified "
                                "when the constant strategy is used.")

            self.constant = check_array(self.constant,
                                        accept_sparse=['csr', 'csc', 'coo'],
                                        ensure_2d=False,
                                        ensure_min_samples=0)

            if self.output_2d_ and self.constant.shape[0] != y.shape[1]:
                raise ValueError("Constant target value should have "
                                 "shape (%d, 1)." % y.shape[1])

            self.constant_ = self.constant

        self.constant_ = np.reshape(self.constant_, (1, -1))
        return self
Ejemplo n.º 15
0
    def fit(self, X, y, sample_weight=None):
        """Fit the random regressor.
        Parameters
        ----------
        X : {array-like, object with finite length or shape}
            Training data, requires length = n_samples
        y : array-like, shape = [n_samples] or [n_samples, n_outputs]
            Target values.
        sample_weight : array-like of shape = [n_samples], optional
            Sample weights.
        Returns
        -------
        self : object
        """
        allowed_strategies = ("mean", "median", "quantile", "constant","median_nonzero")
        if self.strategy not in allowed_strategies:
            raise ValueError("Unknown strategy type: %s, expected one of %s."
                             % (self.strategy, allowed_strategies))

        y = check_array(y, ensure_2d=False)
        if len(y) == 0:
            raise ValueError("y must not be empty.")

        self.output_2d_ = y.ndim == 2
        if y.ndim == 1:
            y = np.reshape(y, (-1, 1))
        self.n_outputs_ = y.shape[1]

        check_consistent_length(X, y, sample_weight)

        if self.strategy == "mean":
            self.constant_ = np.average(y, axis=0, weights=sample_weight)

        elif self.strategy == "median":
            if sample_weight is None:
                self.constant_ = np.median(y, axis=0)
            else:
                self.constant_ = [_weighted_percentile(y[:, k], sample_weight,
                                                       percentile=50.)
                                  for k in range(self.n_outputs_)]
        
        elif self.strategy == "median_nonzero":
            if sample_weight is None:
                self.constant_ = np.median(y[y > 0], axis=0)
            else:
                self.constant_ = [_weighted_percentile(y[y > 0][:, k], sample_weight,
                                                       percentile=50.)
                                  for k in range(self.n_outputs_)]

        elif self.strategy == "quantile":
            if self.quantile is None or not np.isscalar(self.quantile):
                raise ValueError("Quantile must be a scalar in the range "
                                 "[0.0, 1.0], but got %s." % self.quantile)

            percentile = self.quantile * 100.0
            if sample_weight is None:
                self.constant_ = np.percentile(y, axis=0, q=percentile)
            else:
                self.constant_ = [_weighted_percentile(y[:, k], sample_weight,
                                                       percentile=percentile)
                                  for k in range(self.n_outputs_)]

        elif self.strategy == "constant":
            if self.constant is None:
                raise TypeError("Constant target value has to be specified "
                                "when the constant strategy is used.")

            self.constant = check_array(self.constant,
                                        accept_sparse=['csr', 'csc', 'coo'],
                                        ensure_2d=False, ensure_min_samples=0)

            if self.output_2d_ and self.constant.shape[0] != y.shape[1]:
                raise ValueError(
                    "Constant target value should have "
                    "shape (%d, 1)." % y.shape[1])

            self.constant_ = self.constant

        self.constant_ = np.reshape(self.constant_, (1, -1))
        return self