def test_utils_casting(): """Just gonna toss this in here...""" df = pd.DataFrame(np.random.randn(5, 2)) o = casting.to_numpy(df) assert isinstance(o, np.ndarray) assert o.ndim == 2 assert o.shape[0] == 5 assert o.shape[1] == 2 df = pd.DataFrame(np.random.randn(5, 2)) o = casting.to_tensor(df) assert o.ndim == 2 assert o.shape[0] == 5 assert o.shape[1] == 2 df = pd.Series(np.random.randn(5)) o = casting.to_tensor(df) assert o.ndim == 2 assert o.shape[0] == 5 assert o.shape[1] == 1 x = tf.random.normal([5, 2]) o = casting.to_tensor(x) assert o.ndim == 2 assert o.shape[0] == 5 assert o.shape[1] == 2
def _get_y(self, x, y): """Get y, even when x is a DataGenerator and y is None""" if y is not None: return y else: y_true = [d for _, d in make_generator(x, y, test=True)] return np.concatenate(to_numpy(y_true), axis=0)
def _sample(self, x, func, ed=None, axis=1): """Sample from the model""" samples = [] for x_data, y_data in make_generator(x, test=True): if x_data is None: samples += [func(self())] else: samples += [func(self(O.expand_dims(x_data, ed)))] return np.concatenate(to_numpy(samples), axis=axis)
def prior_sample(self, n: int = 1): """Sample from the prior distribution. Parameters ---------- n : int > 0 Number of samples to draw from the prior distribution. Default = 1 Returns ------- |ndarray| Samples from the parameter prior distribution. If ``n>1`` of size ``(n, self.prior.shape)``. If ``n==1``, of size ``(self.prior.shape)``. """ if self.prior is None: return np.full(n, np.nan) elif n == 1: return to_numpy(self.transform(self.prior.sample())) else: return to_numpy(self.transform(self.prior.sample(n)))
def posterior_sample(self, n: int = 1): """Sample from the posterior distribution. Parameters ---------- n : int > 0 Number of samples to draw from the posterior distribution. Default = 1 Returns ------- TODO """ if n < 1: raise ValueError('n must be positive') with Sampling(n=n): return to_numpy(self())
def posterior_sample(self, n: int = 1): """Sample from the posterior distribution. Parameters ---------- n : int > 0 Number of samples to draw from the posterior distribution. Default = 1 Returns ------- |ndarray| Samples from the parameter's posterior distribution. If ``n>1`` of size ``(n, self.prior.shape)``. If ``n==1``, of size ``(self.prior.shape)``. """ if n < 1: raise ValueError("n must be positive") with Sampling(n=n): return to_numpy(self())
def log_prob( self, x, y=None, individually=True, distribution=False, n=1000, batch_size=None, ): """Compute the log probability of `y` given the model TODO: Docs... Parameters ---------- x : |ndarray| or |DataFrame| or |Series| or Tensor Independent variable values of the dataset to evaluate (aka the "features"). y : |ndarray| or |DataFrame| or |Series| or Tensor Dependent variable values of the dataset to evaluate (aka the "target"). individually : bool If ``individually`` is True, returns log probability for each sample individually, so return shape is ``(x.shape[0], ?)``. If ``individually`` is False, returns sum of all log probabilities, so return shape is ``(1, ?)``. distribution : bool If ``distribution`` is True, returns log probability posterior distribution (``n`` samples from the model), so return shape is ``(?, n)``. If ``distribution`` is False, returns log posterior probabilities using the maximum a posteriori estimate for each parameter, so the return shape is ``(?, 1)``. n : int Number of samples to draw for each distribution if ``distribution=True``. batch_size : None or int Compute using batches of this many datapoints. Default is `None` (i.e., do not use batching). Returns ------- log_probs : |ndarray| Log probabilities. Shape is determined by ``individually``, ``distribution``, and ``n`` kwargs. """ # Get a distribution of samples if distribution: with Sampling(n=1, flipout=False): probs = [] for i in range(n): t_probs = [] for x_data, y_data in make_generator( x, y, batch_size=batch_size ): if x_data is None: t_probs += [self().log_prob(y_data)] else: t_probs += [self(x_data).log_prob(y_data)] probs += [np.concatenate(to_numpy(t_probs), axis=0)] probs = np.stack(to_numpy(probs), axis=probs[0].ndim) # Use MAP estimates else: probs = [] for x_data, y_data in make_generator(x, y, batch_size=batch_size): if x_data is None: probs += [self().log_prob(y_data)] else: probs += [self(x_data).log_prob(y_data)] probs = np.concatenate(to_numpy(probs), axis=0) # Return log prob of each sample or sum of log probs if individually: return probs else: return np.sum(probs, axis=0)
def metric(self, metric, x, y=None, batch_size=None): """Compute a metric of model performance TODO: docs TODO: note that this doesn't work w/ generative models Parameters ---------- metric : str or callable Metric to evaluate. Available metrics: * 'lp': log likelihood sum * 'log_prob': log likelihood sum * 'accuracy': accuracy * 'acc': accuracy * 'mean_squared_error': mean squared error * 'mse': mean squared error * 'sum_squared_error': sum squared error * 'sse': sum squared error * 'mean_absolute_error': mean absolute error * 'mae': mean absolute error * 'r_squared': coefficient of determination * 'r2': coefficient of determination * 'recall': true positive rate * 'sensitivity': true positive rate * 'true_positive_rate': true positive rate * 'tpr': true positive rate * 'specificity': true negative rate * 'selectivity': true negative rate * 'true_negative_rate': true negative rate * 'tnr': true negative rate * 'precision': precision * 'f1_score': F-measure * 'f1': F-measure * callable: a function which takes (y_true, y_pred) x : |ndarray| or |DataFrame| or |Series| or Tensor or |DataGenerator| Independent variable values of the dataset to evaluate (aka the "features"). Or a |DataGenerator| to generate both x and y. y : |ndarray| or |DataFrame| or |Series| or Tensor Dependent variable values of the dataset to evaluate (aka the "target"). batch_size : None or int Compute using batches of this many datapoints. Default is `None` (i.e., do not use batching). Returns ------- TODO """ # Get true values and predictions y_true = [] y_pred = [] for x_data, y_data in make_generator( x, y, test=True, batch_size=batch_size ): y_true += [y_data] y_pred += [self(x_data).mean()] y_true = np.concatenate(to_numpy(y_true), axis=0) y_pred = np.concatenate(to_numpy(y_pred), axis=0) # Compute metric between true values and predictions metric_fn = get_metric_fn(metric) return metric_fn(y_true, y_pred)
def posterior_mean(self): """Get the mean of the posterior distribution(s). TODO """ return to_numpy(self())