Beispiel #1
0
    def probability_less_than(self, x=None, y=None, y_pred=None, **kwargs):
        """
        Calculate probability of the output value being smaller than a
        given numeric threshold.

        Args:
            x: Rank-k tensor containing the input data with the input channels
                (or features) for each sample located along its first dimension.
            y_pred: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.
            y: The threshold value.

        Returns:

            Tensor of rank k-1 containing the for each input sample the
            probability of the corresponding y-value to be larger than the
            given threshold.
        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)

        def calculate_prob(y_pred):
            module = get_array_module(y_pred)
            quantiles = to_array(module, self.quantiles, like=y_pred)
            return qq.probability_less_than(y_pred,
                                            quantiles,
                                            y,
                                            quantile_axis=self.quantile_axis)

        return apply(calculate_prob, y_pred)
Beispiel #2
0
    def predict(self, x):
        r"""
        Predict quantiles of the conditional distribution :math:`p(y|x)``.

        Forward propagates the inputs in ``x`` through the network to
        obtain the predicted quantiles ``y_pred``.

        Arguments:

            x(np.array): Rank-k tensor containing the input data with
                the input channels (or features) for each sample located
                along its first dimension.

        Returns:

            Rank-k tensor ``y_pred`` containing the quantiles of each input
            sample along its first dimension
        """

        def predict(x, loss, transformation):
            if transformation is not None:
                x = transformation.invert(x)
            return loss.predict(x)

        return apply(predict,
                     self.model.predict(x),
                     self.losses,
                     self.transformation)
Beispiel #3
0
    def posterior_mean(self, x=None, y_pred=None, **kwargs):
        r"""
        Computes the posterior mean by computing the first moment of the
        predicted posterior CDF.

        Arguments:

            x: Rank-k tensor containing the input data with the input channels
                (or features) for each sample located along its first dimension.
            y_pred: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.
        Returns:

            Tensor or rank k-1 the posterior means for all provided inputs.
        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)

        def calculate_mean(y_pred):
            module = get_array_module(y_pred)
            quantiles = to_array(module, self.quantiles, like=y_pred)
            return qq.posterior_mean(y_pred,
                                     quantiles,
                                     quantile_axis=self.quantile_axis)

        return apply(calculate_mean, y_pred)
Beispiel #4
0
def test_apply():

    f1 = lambda x: 2 * x
    f2 = lambda x, y: x + y

    d = {i: i for i in range(5)}

    a = apply(f1, 1)
    b = apply(f2, 1, 1)
    assert a == a
    assert b == b

    d_a = apply(f1, d)
    d_b = apply(f2, d, d)
    for k in d:
        assert k == d_a[k] // 2
        assert k == d_b[k] // 2
Beispiel #5
0
    def aggregate_batches(self, batches):
        """
        Aggregate list of batches.

        Args:
            batches: List of batches to aggregate.

        Return:
            Tuple ``(x, y)`` containing the aggregated inputs and outputs in
            'batches'.
        """
        xs = []
        ys = None
        # Collect batches.
        for x, y in batches:
            xs.append(x)
            if isinstance(y, dict):
                if ys is None:
                    ys = {}
                for k, y in y.items():
                    ys.setdefault(k, []).append(y)
            else:
                if ys is None:
                    ys = []
                ys.append(y)

        if self.backend is None:
            self.backend = get_tensor_backend(xs[0])

        x = self.backend.concatenate(xs, 0)
        y = utils.apply(lambda y: self.backend.concatenate(y, 0), ys)

        if self.shuffle:
            indices = self._rng.permutation(x.shape[0])
            f = lambda x: x[indices]
            x = f(x)
            y = utils.apply(f, y)
        return x, y
Beispiel #6
0
 def __init__(self, bins, mask=None):
     """
     Args:
         mask: All values that are smaller than or equal to this value will
              be excluded from the calculation of the loss.
     """
     self.bins = apply(lambda x: torch.Tensor(x).to(torch.float), bins)
     if mask is None:
         reduction = "mean"
         self.mask = mask
     else:
         reduction = "none"
         self.mask = np.float32(mask)
     super().__init__(reduction=reduction)
Beispiel #7
0
    def posterior_quantiles(self,
                            x=None,
                            y_pred=None,
                            quantiles=None,
                            key=None):
        r"""
        Compute the posterior quantiles.

        Arguments:

            x: Rank-k tensor containing the input data with the input channels
                (or features) for each sample located along its first dimension.
            y_pred: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.
            quantiles: List of quantile fraction values :math:`\tau_i \in [0, 1]`.
        Returns:

            Rank-k tensor containing the desired predicted quantiles along its
            first dimension.
        """
        if y_pred is None:
            if x is None:
                raise ValueError("One of the keyword arguments 'x' or 'y_pred'"
                                 " must be provided.")
            y_pred = self.predict(x)

        if quantiles is None:
            raise ValueError(
                "The 'quantiles' keyword argument must be provided to"
                "calculate the posterior quantiles.")

        if key is None:
            bins = self.bins
        else:
            if isinstance(self.bins, dict):
                bins = self.bins[key]
            else:
                bins = self.bins

        def calculate_quantiles(y_pred, bins):
            module = get_array_module(y_pred)
            bins = to_array(module, bins, like=y_pred)
            return qd.posterior_quantiles(y_pred,
                                          bins,
                                          quantiles,
                                          bin_axis=self.bin_axis)

        return apply(calculate_quantiles, y_pred, bins)
Beispiel #8
0
    def crps(self, x=None, y_pred=None, y_true=None, **kwargs):
        r"""
        Compute the Continuous Ranked Probability Score (CRPS).

        This function uses a piece-wise linear fit to the approximate posterior
        CDF obtained from the predicted quantiles in :code:`y_pred` to
        approximate the continuous ranked probability score (CRPS):

        .. math::
            \text{CRPS}(\mathbf{y}, x) = \int_{-\infty}^\infty (F_{x | \mathbf{y}}(x')
            - \mathrm{1}_{x < x'})^2 \: dx'

        Arguments:

            x: Rank-k tensor containing the input data with the input channels
                (or features) for each sample located along its first dimension.
            y_pred: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.
            y_true: Array containing the `n` true values, i.e. samples of the
                 true conditional distribution predicted by the QRNN.

            quantiles: 1D array containing the `k` quantile fractions :math:`\tau`
                       that correspond to the columns in `y_pred`.

        Returns:

            Tensor of rank k-1 containing the CRPS values for each of the samples.
        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)
        if y_true is None:
            raise ValueError(
                "The y_true argument must be provided to calculate "
                "the CRPS provided.")

        def calculate_crps(y_pred):
            module = get_array_module(y_pred)
            quantiles = to_array(module, self.quantiles, like=y_pred)
            return qq.crps(y_pred,
                           y_true,
                           quantiles,
                           quantile_axis=self.quantile_axis)

        return apply(calculate_crps, y_pred)
Beispiel #9
0
    def cdf(self, x=None, y_pred=None, **kwargs):
        r"""
        Approximate the posterior CDF for given inputs ``x``.

        Propagates the inputs in ``x`` forward through the network and
        approximates the posterior CDF using a piecewise linear function.

        The piecewise linear function is given by its at quantiles
        :math:`y_{\tau_i}`` for :math:`\tau = \{0.0, \tau_1, \ldots,
        \tau_k, 1.0\}` where :math:`\tau_i` are the quantile fractions to be
        predicted by the network. The values for :math:`y_{\tau={0.0}}`
        and :math:`x_{\tau={1.0}}` are computed using

        .. math::

            y_{\tau=0.0} = 2.0 x_{\tau_1} - x_{\tau_2}

            y_{\tau=1.0} = 2.0 x_{\tau_k} - x_{\tau_{k-1}}

        Arguments:

            x: Rank-k tensor containing the input data with
                the input channels (or features) for each sample located
                along its first dimension.
            y_pred: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.

        Returns:

            Tuple ``(y_cdf, cdf)`` containing the abscissa-values ``y_cdf`` and
            the ordinates values ``cdf`` of the piece-wise linear approximation
            of the CDF :math:`F(y)`.

        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)

        def calculate_cdf(y_pred):
            module = get_array_module(y_pred)
            quantiles = to_array(module, self.quantiles, like=y_pred)
            return qq.cdf(y_pred, quantiles, quantile_axis=self.quantile_axis)

        return apply(calculate_cdf, y_pred)
Beispiel #10
0
    def sample_posterior(self, x=None, y_pred=None, n_samples=1, key=None):
        r"""
        Generates :code:`n` samples from the predicted posterior distribution
        for the input vector :code:`x`. The sampling is performed by the
        inverse CDF method using the predicted CDF obtained from the
        :code:`cdf` member function.

        Arguments:

            x: Rank-k tensor containing the input data with
                the input channels (or features) for each sample located
                along its first dimension.
            y_pred: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.
            n: The number of samples to generate.

        Returns:

            Rank-k tensor containing the random samples for each input sample
            along the first dimension.
        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)

        if key is None:
            bins = self.bins
        else:
            if isinstance(self.bins, dict):
                bins = self.bins[key]
            else:
                bins = self.bins

        def calculate_samples(y_pred, bins):
            module = get_array_module(y_pred)
            bins = to_array(module, bins, like=y_pred)
            return qd.sample_posterior(y_pred,
                                       bins,
                                       n_samples=n_samples,
                                       bin_axis=self.bin_axis)

        return apply(calculate_samples, y_pred, bins)
Beispiel #11
0
    def probability_larger_than(self, x=None, y=None, y_pred=None, key=None):
        """
        Calculate probability of the output value being larger than a
        given numeric threshold.

        Args:
            x: Rank-k tensor containing the input data with the input channels
                (or features) for each sample located along its first dimension.
            y: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.
            y: The threshold value.

        Returns:

            Tensor of rank k-1 containing the for each input sample the
            probability of the corresponding y-value to be larger than the
            given threshold.
        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)
        if y is None:
            raise ValueError("The y argument must be provided to compute the "
                             " probability.")

        if key is None:
            bins = self.bins
        else:
            if isinstance(self.bins, dict):
                bins = self.bins[key]
            else:
                bins = self.bins

        def calculate_probability(y_pred, bins):
            module = get_array_module(y_pred)
            bins = to_array(module, bins, like=y_pred)
            return qd.probability_larger_than(y_pred,
                                              bins,
                                              y,
                                              bin_axis=self.bin_axis)

        return apply(calculate_probability, y_pred, bins)
Beispiel #12
0
    def quantile_function(self, x=None, y_pred=None, y=None, key=None):
        r"""
        Evaluate the quantile function a given y values.

        Arguments:

            x: Rank-k tensor containing the input data with
                the input channels (or features) for each sample located
                along its first dimension.
            y_pred: Optional pre-computed predicted pdf, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.
            y: Rank-k tensor containing the values at which to evaluate the
                quantile function for each of the inputs in ``x``.

        Returns:

            Rank-k tensor containing the random samples for each input sample
            along the first dimension.
        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)

        if key is None:
            bins = self.bins
        else:
            if isinstance(self.bins, dict):
                bins = self.bins[key]
            else:
                bins = self.bins

        def calculate_quantile_function(y_pred, bins):
            module = get_array_module(y_pred)
            bins = to_array(module, bins, like=y_pred)
            return qd.quantile_function(y_pred,
                                        y,
                                        bins,
                                        bin_axis=self.bin_axis)

        return apply(calculate_quantile_function, y_pred, bins)
Beispiel #13
0
    def crps(self, x=None, y_pred=None, y_true=None, key=None):
        r"""
        Calculate CRPS score for given reference values.

        Arguments:

            x: Rank-k tensor containing the input data with
                the input channels (or features) for each sample located
                along its first dimension.
            y_pred: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.
            y_true: Rank-k tensor containing the true y values.

        Returns:

            Rank-k tensor containing crps values for all samples in x.
        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)

        if key is None:
            bins = self.bins
        else:
            if isinstance(self.bins, dict):
                bins = self.bins[key]
            else:
                bins = self.bins

        def calculate_crps(y_pred, bins):
            module = get_array_module(y_pred)
            bins = to_array(module, bins, like=y_pred)
            return qd.crps(y_pred, y_true, bins, bin_axis=self.bin_axis)

        return apply(calculate_crps, y_pred, bins)
Beispiel #14
0
    def pdf(self, x=None, y_pred=None, **kwargs):
        r"""
        Approximate the posterior probability density function (PDF) for given
        inputs ``x``.

        The PDF is approximated by computing the derivative of the piece-wise
        linear approximation of the CDF as computed by the
        :py:meth:`quantnn.QRNN.cdf` function.

        Arguments:

            x: Rank-k tensor containing the input data with
                the input channels (or features) for each sample located
                along its first dimension.
            y_pred: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.

        Returns:

            Tuple (x_pdf, y_pdf) containing the array with shape `(n, k)`  containing
            the x and y coordinates describing the PDF for the inputs in ``x``.

        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)

            def calculate_pdf(y_pred):
                module = get_array_module(y_pred)
                quantiles = to_array(module, self.quantiles, like=y_pred)
                return qq.pdf(y_pred,
                              quantiles,
                              quantile_axis=self.quantile_axis)

            return apply(calculate_pdf, y_pred)
Beispiel #15
0
    def sample_posterior_gaussian_fit(self, x=None, y_pred=None, n_samples=1):
        r"""
        Generates :code:`n` samples from the predicted posterior
        distribution for the input vector :code:`x`. The sampling
        is performed using a Gaussian fit to the predicted quantiles.

        Arguments:

            x: Rank-k tensor containing the input data with the input channels
                (or features) for each sample located along its first dimension.
            y_pred: Optional pre-computed quantile predictions, which, when
                 provided, will be used to avoid repeated propagation of the
                 the inputs through the network.
            n(int): The number of samples to generate.

        Returns:

            Tuple (xs, fs) containing the :math:`x`-values in `xs` and corresponding
            values of the posterior CDF :math:`F(x)` in `fs`.
        """
        if y_pred is None:
            if x is None:
                raise ValueError(
                    "One of the input arguments x or y_pred must be "
                    " provided.")
            y_pred = self.predict(x)

        def calculate_samples(y_pred):
            module = get_array_module(y_pred)
            quantiles = to_array(module, self.quantiles, like=y_pred)
            return qq.sample_posterior_gaussian(
                y_pred,
                quantiles,
                n_samples=n_samples,
                quantile_axis=self.quantile_axis)

        return apply(calculate_samples, y_pred)
Beispiel #16
0
 def predict(self, x):
     y_pred = self.model.predict(x)
     return apply(self._post_process_prediction, y_pred, self.bins)