Esempio n. 1
0
    def residuals_plot(self, x, y=None, **kwargs):
        """Plot the distribution of residuals of the model's predictions.

        TODO: docs...

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        **kwargs
            Additional keyword arguments are passed to :func:`.plot_dist`

        Example
        -------

        TODO

        """
        res = self.residuals(x, y)
        plot_dist(res, **kwargs)
        plt.xlabel("Residual (True - Predicted)")
Esempio n. 2
0
    def r_squared_plot(self, x, y=None, n=1000, style="hist", **kwargs):
        """Plot the Bayesian R-squared distribution.

        See :meth:`~r_squared` for more info on the Bayesian R-squared metric.

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        n : int
            Number of posterior draws to use for computing the r-squared
            distribution.  Default = `1000`.
        **kwargs
            Additional keyword arguments are passed to :func:`.plot_dist`

        Example
        -------

        TODO

        """
        r2 = self.r_squared(x, y, n=n)
        plot_dist(r2, style=style, **kwargs)
        plt.xlabel("Bayesian R squared")
Esempio n. 3
0
    def residuals_plot(self, x, y=None, batch_size=None, **kwargs):
        r"""Plot the distribution of residuals of the model's predictions.

        TODO: docs...

        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| for both x and y.
        y : |ndarray| or |DataFrame| or |Series|
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).
        **kwargs
            Additional keyword arguments are passed to :func:`.plot_dist`

        Example
        -------

        TODO

        """
        res = self.residuals(x, y, batch_size=batch_size)
        plot_dist(res, **kwargs)
        plt.xlabel("Residual (True - Predicted)")
Esempio n. 4
0
    def posterior_plot(self,
                       n: int = 10000,
                       style: str = 'fill',
                       bins: Union[int, list, np.ndarray] = 20,
                       ci: float = 0.0,
                       bw: float = 0.075,
                       alpha: float = 0.4,
                       color=None,
                       **kwargs):
        """Plot distribution of samples from the posterior distribution.

        Parameters
        ----------
        n : int
            Number of samples to take from each posterior distribution for
            estimating the density.  Default = 10000
        style : str
            Which style of plot to show.  Available types are:

            * ``'fill'`` - filled density plot (the default)
            * ``'line'`` - line density plot
            * ``'hist'`` - histogram

        bins : int or list or |ndarray|
            Number of bins to use for the posterior density histogram (if 
            ``style='hist'``), or a list or vector of bin edges.
        ci : float between 0 and 1
            Confidence interval to plot.  Default = 0.0 (i.e., not plotted)
        bw : float
            Bandwidth of the kernel density estimate (if using ``style='line'``
            or ``style='fill'``).  Default is 0.075
        alpha : float between 0 and 1
            Transparency of fill/histogram
        color : matplotlib color code or list of them
            Color(s) to use to plot the distribution.
            See https://matplotlib.org/tutorials/colors/colors.html
            Default = use the default matplotlib color cycle
        kwargs
            Additional keyword arguments are passed to 
            :meth:`.utils.plotting.plot_dist`
        """

        # Sample from the posterior
        samples = self.posterior_sample(n=n)

        # Plot the posterior densities
        plot_dist(samples,
                  xlabel=self.name,
                  style=style,
                  bins=bins,
                  ci=ci,
                  bw=bw,
                  alpha=alpha,
                  color=color,
                  **kwargs)

        # Label with parameter name
        plt.xlabel(self.name)
Esempio n. 5
0
    def pred_dist_plot(self, x, n=10000, cols=1, individually=False, **kwargs):
        """Plot posterior predictive distribution from the model given ``x``.

        TODO: Docs...


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").
        n : int
            Number of samples to draw from the model given ``x``.
            Default = 10000
        cols : int
            Divide the subplots into a grid with this many columns (if
            ``individually=True``.
        individually : bool
            If ``True``, plot one subplot per datapoint in ``x``, otherwise
            plot all the predictive distributions on the same plot.
        **kwargs
            Additional keyword arguments are passed to :func:`.plot_dist`

        Example
        -------

        TODO

        """

        # Sample from the predictive distribution
        samples = self.predictive_sample(x, n=n)

        # Independent variable must be scalar
        Ns = samples.shape[0]
        N = samples.shape[1]
        if samples.ndim > 2 and any(e > 1 for e in samples.shape[2:]):
            raise NotImplementedError("only scalar dependent variables are "
                                      "supported")
        else:
            samples = samples.reshape([Ns, N])

        # Plot the predictive distributions
        if individually:
            rows = np.ceil(N / cols)
            for i in range(N):
                plt.subplot(rows, cols, i + 1)
                plot_dist(samples[:, i], **kwargs)
                plt.xlabel("Predicted dependent variable value for " + str(i))
            plt.tight_layout()
        else:
            plot_dist(samples, **kwargs)
            plt.xlabel("Predicted dependent variable value")