Ejemplo n.º 1
0
def test_utils_casting():
    """Just gonna toss this in here..."""

    df = pd.DataFrame(np.random.randn(5, 2))
    o = casting.to_numpy(df)
    assert isinstance(o, np.ndarray)
    assert o.ndim == 2
    assert o.shape[0] == 5
    assert o.shape[1] == 2

    df = pd.DataFrame(np.random.randn(5, 2))
    o = casting.to_tensor(df)
    assert o.ndim == 2
    assert o.shape[0] == 5
    assert o.shape[1] == 2

    df = pd.Series(np.random.randn(5))
    o = casting.to_tensor(df)
    assert o.ndim == 2
    assert o.shape[0] == 5
    assert o.shape[1] == 1

    x = tf.random.normal([5, 2])
    o = casting.to_tensor(x)
    assert o.ndim == 2
    assert o.shape[0] == 5
    assert o.shape[1] == 2
Ejemplo n.º 2
0
 def _get_y(self, x, y):
     """Get y, even when x is a DataGenerator and y is None"""
     if y is not None:
         return y
     else:
         y_true = [d for _, d in make_generator(x, y, test=True)]
         return np.concatenate(to_numpy(y_true), axis=0)
Ejemplo n.º 3
0
 def _sample(self, x, func, ed=None, axis=1):
     """Sample from the model"""
     samples = []
     for x_data, y_data in make_generator(x, test=True):
         if x_data is None:
             samples += [func(self())]
         else:
             samples += [func(self(O.expand_dims(x_data, ed)))]
     return np.concatenate(to_numpy(samples), axis=axis)
Ejemplo n.º 4
0
    def prior_sample(self, n: int = 1):
        """Sample from the prior distribution.

        Parameters
        ----------
        n : int > 0
            Number of samples to draw from the prior distribution.
            Default = 1

        Returns
        -------
        |ndarray|
            Samples from the parameter prior distribution.  If ``n>1`` of size
            ``(n, self.prior.shape)``.  If ``n==1``, of size
            ``(self.prior.shape)``.
        """
        if self.prior is None:
            return np.full(n, np.nan)
        elif n == 1:
            return to_numpy(self.transform(self.prior.sample()))
        else:
            return to_numpy(self.transform(self.prior.sample(n)))
Ejemplo n.º 5
0
    def posterior_sample(self, n: int = 1):
        """Sample from the posterior distribution.

        Parameters
        ----------
        n : int > 0
            Number of samples to draw from the posterior distribution.
            Default = 1

        Returns
        -------
        TODO
        """
        if n < 1:
            raise ValueError('n must be positive')
        with Sampling(n=n):
            return to_numpy(self())
Ejemplo n.º 6
0
    def posterior_sample(self, n: int = 1):
        """Sample from the posterior distribution.

        Parameters
        ----------
        n : int > 0
            Number of samples to draw from the posterior distribution.
            Default = 1

        Returns
        -------
        |ndarray|
            Samples from the parameter's posterior distribution.  If ``n>1`` of
            size ``(n, self.prior.shape)``.  If ``n==1``, of size
            ``(self.prior.shape)``.
        """
        if n < 1:
            raise ValueError("n must be positive")
        with Sampling(n=n):
            return to_numpy(self())
Ejemplo n.º 7
0
    def log_prob(
        self,
        x,
        y=None,
        individually=True,
        distribution=False,
        n=1000,
        batch_size=None,
    ):
        """Compute the log probability of `y` given the model

        TODO: Docs...


        Parameters
        ----------
        x : |ndarray| or |DataFrame| or |Series| or Tensor
            Independent variable values of the dataset to evaluate (aka the
            "features").
        y : |ndarray| or |DataFrame| or |Series| or Tensor
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        individually : bool
            If ``individually`` is True, returns log probability for each
            sample individually, so return shape is ``(x.shape[0], ?)``.
            If ``individually`` is False, returns sum of all log probabilities,
            so return shape is ``(1, ?)``.
        distribution : bool
            If ``distribution`` is True, returns log probability posterior
            distribution (``n`` samples from the model),
            so return shape is ``(?, n)``.
            If ``distribution`` is False, returns log posterior probabilities
            using the maximum a posteriori estimate for each parameter,
            so the return shape is ``(?, 1)``.
        n : int
            Number of samples to draw for each distribution if
            ``distribution=True``.
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        log_probs : |ndarray|
            Log probabilities. Shape is determined by ``individually``,
            ``distribution``, and ``n`` kwargs.
        """

        # Get a distribution of samples
        if distribution:
            with Sampling(n=1, flipout=False):
                probs = []
                for i in range(n):
                    t_probs = []
                    for x_data, y_data in make_generator(
                        x, y, batch_size=batch_size
                    ):
                        if x_data is None:
                            t_probs += [self().log_prob(y_data)]
                        else:
                            t_probs += [self(x_data).log_prob(y_data)]
                    probs += [np.concatenate(to_numpy(t_probs), axis=0)]
            probs = np.stack(to_numpy(probs), axis=probs[0].ndim)

        # Use MAP estimates
        else:
            probs = []
            for x_data, y_data in make_generator(x, y, batch_size=batch_size):
                if x_data is None:
                    probs += [self().log_prob(y_data)]
                else:
                    probs += [self(x_data).log_prob(y_data)]
            probs = np.concatenate(to_numpy(probs), axis=0)

        # Return log prob of each sample or sum of log probs
        if individually:
            return probs
        else:
            return np.sum(probs, axis=0)
Ejemplo n.º 8
0
    def metric(self, metric, x, y=None, batch_size=None):
        """Compute a metric of model performance

        TODO: docs

        TODO: note that this doesn't work w/ generative models


        Parameters
        ----------
        metric : str or callable
            Metric to evaluate.  Available metrics:

            * 'lp': log likelihood sum
            * 'log_prob': log likelihood sum
            * 'accuracy': accuracy
            * 'acc': accuracy
            * 'mean_squared_error': mean squared error
            * 'mse': mean squared error
            * 'sum_squared_error': sum squared error
            * 'sse': sum squared error
            * 'mean_absolute_error': mean absolute error
            * 'mae': mean absolute error
            * 'r_squared': coefficient of determination
            * 'r2': coefficient of determination
            * 'recall': true positive rate
            * 'sensitivity': true positive rate
            * 'true_positive_rate': true positive rate
            * 'tpr': true positive rate
            * 'specificity': true negative rate
            * 'selectivity': true negative rate
            * 'true_negative_rate': true negative rate
            * 'tnr': true negative rate
            * 'precision': precision
            * 'f1_score': F-measure
            * 'f1': F-measure
            * callable: a function which takes (y_true, y_pred)

        x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator|
            Independent variable values of the dataset to evaluate (aka the
            "features").  Or a |DataGenerator| to generate both x and y.
        y : |ndarray| or |DataFrame| or |Series| or Tensor
            Dependent variable values of the dataset to evaluate (aka the
            "target").
        batch_size : None or int
            Compute using batches of this many datapoints.  Default is `None`
            (i.e., do not use batching).

        Returns
        -------
        TODO
        """

        # Get true values and predictions
        y_true = []
        y_pred = []
        for x_data, y_data in make_generator(
            x, y, test=True, batch_size=batch_size
        ):
            y_true += [y_data]
            y_pred += [self(x_data).mean()]
        y_true = np.concatenate(to_numpy(y_true), axis=0)
        y_pred = np.concatenate(to_numpy(y_pred), axis=0)

        # Compute metric between true values and predictions
        metric_fn = get_metric_fn(metric)
        return metric_fn(y_true, y_pred)
Ejemplo n.º 9
0
    def posterior_mean(self):
        """Get the mean of the posterior distribution(s).

        TODO
        """
        return to_numpy(self())