def brownian_motion_unknown_scales(locs): """Brownian Motion model with scale parameters treated as random variables. Args: locs: Array of loc parameters with np.nan value if loc is unobserved in shape (num_timesteps,). Returns: model: `StanModel`. """ code = """ data { int<lower=0> num_timesteps; int<lower=0> num_observations; int<lower = 1, upper = num_timesteps> observation_indices[num_observations]; vector[num_observations] observations; } parameters { real<lower=0> innovation_noise_scale; real<lower=0> observation_noise_scale; vector[num_timesteps] loc; } model { innovation_noise_scale ~ lognormal(0, 2); observation_noise_scale ~ lognormal(0, 2); loc[1] ~ normal(0, innovation_noise_scale); for (t in 2:num_timesteps){ loc[t] ~ normal(loc[t-1], innovation_noise_scale); } observations ~ normal(loc[observation_indices], observation_noise_scale); } """ stan_data = { 'num_timesteps': len(locs), 'num_observations': len(locs[np.isfinite(locs)]), 'observation_indices': np.arange(1, len(locs) + 1)[np.isfinite(locs)], 'observations': locs[np.isfinite(locs)] } model = util.cached_stan_model(code) def _ext_identity(samples): """Extracts the values of all latent variables.""" return { 'innovation_noise_scale': util.get_columns(samples, r'^innovation_noise_scale$')[:, 0], 'observation_noise_scale': util.get_columns(samples, r'^observation_noise_scale$')[:, 0], 'locs': util.get_columns(samples, r'^loc\.\d+$') } extract_fns = {'identity': _ext_identity} return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def brownian_motion(locs, innovation_noise, observation_noise): """Brownian Motion model. Args: locs: Array of loc parameters with np.nan value if loc is unobserved in shape (num_timesteps,) innovation_noise: Python `float`. observation_noise: Python `float`. Returns: model: `StanModel`. """ code = """ data { int<lower=0> num_timesteps; int<lower=0> num_observations; int<lower = 1, upper = num_timesteps> observation_indices[num_observations]; vector[num_observations] observations; real<lower=0> innovation_noise; real<lower=0> observation_noise; } parameters { vector[num_timesteps] loc; } model { loc[1] ~ normal(0, innovation_noise); for (t in 2:num_timesteps){ loc[t] ~ normal(loc[t-1], innovation_noise); } observations ~ normal(loc[observation_indices], observation_noise); } """ stan_data = { 'num_timesteps': len(locs), 'num_observations': len(locs[np.isfinite(locs)]), 'observation_indices': np.arange(1, len(locs) + 1)[np.isfinite(locs)], 'observations': locs[np.isfinite(locs)], 'innovation_noise': innovation_noise, 'observation_noise': observation_noise } model = util.cached_stan_model(code) def _ext_identity(samples): """Extracts the values of all latent variables.""" locs = util.get_columns(samples, r'^loc\.\d+$') return locs extract_fns = {'identity': _ext_identity} return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def eight_schools(): """Eight Schools model. Returns: model: `StanModel`. """ code = """ data { int<lower=0> num_schools; real treatment_effects[num_schools]; real<lower=0> treatment_stddevs[num_schools]; } parameters { real avg_effect; real log_stddev; vector[num_schools] std_school_effects; } transformed parameters { vector[num_schools] school_effects; school_effects <- std_school_effects * exp(log_stddev) + avg_effect; } model { avg_effect ~ normal(0, 5); log_stddev ~ normal(5, 1); std_school_effects ~ normal(0, 1); treatment_effects ~ normal(school_effects, treatment_stddevs); } """ stan_data = { 'num_schools': 8, 'treatment_effects': np.array([28, 8, -3, 7, -1, 1, 18, 12], dtype=np.float32), 'treatment_stddevs': np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32) } model = util.cached_stan_model(code) def _ext_identity(samples): """Extracts the values of all latent variables.""" res = collections.OrderedDict() res['avg_effect'] = util.get_columns(samples, r'^avg_effect$')[:, 0] res['log_stddev'] = util.get_columns(samples, r'^log_stddev$')[:, 0] res['school_effects'] = util.get_columns(samples, r'^school_effects\.\d+$') return res extract_fns = {'identity': _ext_identity} return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def item_response_theory( train_student_ids, train_question_ids, train_correct, test_student_ids=None, test_question_ids=None, test_correct=None, ): """One-parameter logistic item-response theory (IRT) model. Args: train_student_ids: integer `tensor` with shape `[num_train_points]`. training student ids, ranging from 0 to `num_students`. train_question_ids: integer `tensor` with shape `[num_train_points]`. training question ids, ranging from 0 to `num_questions`. train_correct: integer `tensor` with shape `[num_train_points]`. whether the student in the training set answered the question correctly, either 0 or 1. test_student_ids: Integer `Tensor` with shape `[num_test_points]`. Testing student ids, ranging from 0 to `num_students`. Can be `None`, in which case test-related sample transformations are not computed. test_question_ids: Integer `Tensor` with shape `[num_test_points]`. Testing question ids, ranging from 0 to `num_questions`. Can be `None`, in which case test-related sample transformations are not computed. test_correct: Integer `Tensor` with shape `[num_test_points]`. Whether the student in the testing set answered the question correctly, either 0 or 1. Can be `None`, in which case test-related sample transformations are not computed. Returns: target: `StanModel`. """ code = """ data { int<lower=0> num_students; int<lower=0> num_questions; int<lower=0> num_train_pairs; int<lower=0> num_test_pairs; int<lower=1,upper=num_students> train_student_ids[num_train_pairs]; int<lower=1,upper=num_questions> train_question_ids[num_train_pairs]; int<lower=0,upper=1> train_responses[num_train_pairs]; int<lower=1,upper=num_students> test_student_ids[num_test_pairs]; int<lower=1,upper=num_questions> test_question_ids[num_test_pairs]; int<lower=0,upper=1> test_responses[num_test_pairs]; } parameters { real mean_student_ability; vector[num_students] student_ability; vector[num_questions] question_difficulty; } model { { mean_student_ability ~ normal(0.75, 1); student_ability ~ normal(0, 1); question_difficulty ~ normal(0, 1); for (i in 1:num_train_pairs) { real pair_logit; pair_logit = ( mean_student_ability + student_ability[train_student_ids[i]] - question_difficulty[train_question_ids[i]] ); train_responses[i] ~ bernoulli_logit(pair_logit); } } } generated quantities { real test_nll = 0.; real per_example_test_nll[num_test_pairs]; { for (i in 1:num_test_pairs) { real pair_logit; pair_logit = ( mean_student_ability + student_ability[test_student_ids[i]] - question_difficulty[test_question_ids[i]] ); per_example_test_nll[i] = -bernoulli_logit_lpmf(test_responses[i] | pair_logit); } test_nll = sum(per_example_test_nll); } } """ have_test = test_student_ids is not None # cmdstanpy can't handle zero-sized arrays at the moment: # https://github.com/stan-dev/cmdstanpy/issues/203 if not have_test: test_student_ids = train_student_ids[:1] test_question_ids = train_question_ids[:1] test_correct = train_correct[:1] stan_data = { 'num_train_pairs': train_student_ids.shape[0], 'num_test_pairs': test_student_ids.shape[0], 'num_students': max(int(train_student_ids.max()), int(test_student_ids.max())) + 1, 'num_questions': max(int(train_question_ids.max()), int(test_question_ids.max())) + 1, 'train_student_ids': train_student_ids + 1, # N.B. Stan arrays are 1-indexed. 'train_question_ids': train_question_ids + 1, 'train_responses': train_correct, 'test_student_ids': test_student_ids + 1, 'test_question_ids': test_question_ids + 1, 'test_responses': test_correct, } model = util.cached_stan_model(code) def _ext_identity(samples): """Extracts all the parameters.""" res = collections.OrderedDict() res['mean_student_ability'] = util.get_columns( samples, r'^mean_student_ability$', )[:, 0] res['student_ability'] = util.get_columns( samples, r'^student_ability\[\d+\]$', ) res['question_difficulty'] = util.get_columns( samples, r'^question_difficulty\[\d+\]$', ) return res def _ext_test_nll(samples): return util.get_columns(samples, r'^test_nll$')[:, 0] def _ext_per_example_test_nll(samples): return util.get_columns(samples, r'^per_example_test_nll\[\d+\]$') extract_fns = {'identity': _ext_identity} if have_test: extract_fns['test_nll'] = _ext_test_nll extract_fns['per_example_test_nll'] = _ext_per_example_test_nll return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def partially_observed_lorenz_system(observed_values, innovation_scale, observation_scale, observation_mask, step_size, observation_index): """Lorenz System model. Args: observed_values: Array of observed values. innovation_scale: Python `float`. observation_scale: Python `float`. observation_mask: `bool` array used to occlude observations. step_size: Python `float`. observation_index: `int` index used to pick which latent time series is observed. Returns: model: `StanModel`. """ code = """ data { int<lower=0> num_timesteps; int<lower=0> num_observations; int<lower=1> observation_state_index; int<lower = 1, upper = num_timesteps> observation_time_indices[ num_observations]; vector[num_observations] observations; real<lower=0> innovation_scale; real<lower=0> observation_scale; real<lower=0> step_size; } parameters { matrix[num_timesteps, 3] latents; } model {""" + LORENZ_SYSTEM_OBSERVATIONS_MODEL + '}' stan_data = { 'num_timesteps': len(observed_values), 'num_observations': len(observed_values[observation_mask]), 'observation_time_indices': np.arange(1, len(observed_values) + 1)[observation_mask], 'observations': observed_values[observation_mask], 'observation_state_index': observation_index + 1, 'innovation_scale': innovation_scale, 'observation_scale': observation_scale, 'step_size': step_size } model = util.cached_stan_model(code) def _ext_identity(samples): """Extracts the values of all latent variables.""" latents = util.get_columns(samples, r'^latents\.\d+\.\d+$') # Last two dimensions are swapped in Stan output. return latents.reshape((-1, 3, 30)).swapaxes(1, 2) extract_fns = {'identity': _ext_identity} return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def radon_contextual_effects(num_counties, train_log_uranium, train_floor, train_county, train_floor_by_county, train_log_radon): """Heirarchical model of measured radon concentration in homes. The Stan model is cut and pasted from: https://mc-stan.org/users/documentation/case-studies/radon.html#Correlations-among-levels Args: num_counties: `int`, number of counties represented in the data. train_log_uranium: Floating-point `Tensor` with shape `[num_train_points]`. Soil uranium measurements. train_floor: Integer `Tensor` with shape `[num_train_points]`. Floor of the house on which the measurement was taken. train_county: Integer `Tensor` with values in `range(0, num_counties)` of shape `[num_train_points]`. County in which the measurement was taken. train_floor_by_county: Floating-point `Tensor` with shape `[num_train_points]`. Average floor on which the measurement was taken for the county in which each house is located (the `Tensor` will have `num_counties` unique values). This represents the contextual effect. train_log_radon: Floating-point `Tensor` with shape `[num_train_points]`. Radon measurement for each house (the dependent variable in the model). Returns: model: `StanModel`. """ code = """ data { int<lower=0> num_counties; int<lower=0> num_train; int<lower=0,upper=num_counties-1> county[num_train]; vector[num_train] log_uranium; vector[num_train] which_floor; vector[num_train] floor_by_county; vector[num_train] log_radon; } parameters { vector[num_counties] county_effect; vector[3] weight; real county_effect_mean; real<lower=0,upper=100> county_effect_scale; real<lower=0,upper=100> log_radon_scale; } transformed parameters { vector[num_train] log_radon_mean; for (i in 1:num_train) log_radon_mean[i] <- county_effect[county[i] + 1] + log_uranium[i] * weight[1] + which_floor[i] * weight[2] + floor_by_county[i] * weight[3]; } model { county_effect_mean ~ normal(0, 1); county_effect ~ normal(county_effect_mean, county_effect_scale); weight ~ normal(0, 1); log_radon ~ normal(log_radon_mean, log_radon_scale); } """ stan_data = { 'num_train': train_log_radon.shape[0], 'num_counties': num_counties, 'county': np.array(train_county), 'log_uranium': np.array(train_log_uranium), 'floor_by_county': np.array(train_floor_by_county), 'which_floor': np.array(train_floor), # `floor` conflicts with a Stan fn 'log_radon': np.array(train_log_radon) } model = util.cached_stan_model(code) def _ext_identity(samples): """Extracts the values of all latent variables.""" res = collections.OrderedDict() res['county_effect_mean'] = util.get_columns( samples, r'^county_effect_mean$')[:, 0] res['county_effect_scale'] = util.get_columns( samples, r'^county_effect_scale$')[:, 0] res['county_effect'] = util.get_columns(samples, r'^county_effect\[\d+\]$') res['weight'] = util.get_columns(samples, r'^weight\[\d+\]$') res['log_radon_scale'] = (util.get_columns(samples, r'^log_radon_scale$')[:, 0]) return res extract_fns = {'identity': _ext_identity} return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def stochastic_volatility(centered_returns): # pylint: disable=long-lines """Stochastic volatility model. This formulation is inspired by (a version in the Stan users' manual)[ https://mc-stan.org/docs/2_21/stan-users-guide/stochastic-volatility-models.html]. Args: centered_returns: float `Tensor` of shape `[num_timesteps]` giving the mean-adjusted return (change in asset price, minus the average change) observed at each step. Returns: model: `StanModel`. """ # pylint: enable=long-lines # This model is specified in 'noncentered' parameterization, in terms of # standardized residuals `log_volatilities_std`. We expect this form of the # model to mix more easily than a direct specification would. This makes # it valuable for obtaining ground truth, but caution should be used when # comparing performance of inference algorithms across parameterizations. code = """ data { int<lower=0> num_timesteps; vector[num_timesteps] centered_returns; } parameters { real<lower=-1,upper=1> persistence; real mean_log_volatility; real<lower=0> white_noise_shock_scale; vector[num_timesteps] log_volatilities_std; } transformed parameters { vector[num_timesteps] log_volatilities = ( log_volatilities_std * white_noise_shock_scale); log_volatilities[1] /= sqrt(1 - square(persistence)); log_volatilities += mean_log_volatility; for (t in 2:num_timesteps) log_volatilities[t] += persistence * ( log_volatilities[t - 1] - mean_log_volatility); } model { (persistence + 1) * 0.5 ~ beta(20, 1.5); white_noise_shock_scale ~ cauchy(0, 2); mean_log_volatility ~ cauchy(0, 5); log_volatilities_std ~ std_normal(); centered_returns ~ normal(0, exp(log_volatilities / 2)); } """ stan_data = { 'num_timesteps': len(centered_returns), 'centered_returns': centered_returns } model = util.cached_stan_model(code) def _ext_identity(samples): """Extracts the values of all latent variables.""" res = collections.OrderedDict() res['mean_log_volatility'] = util.get_columns( samples, r'^mean_log_volatility$')[:, 0] res['white_noise_shock_scale'] = util.get_columns( samples, r'^white_noise_shock_scale$')[:, 0] res['persistence_of_volatility'] = util.get_columns( samples, r'^persistence$')[:, 0] res['log_volatility'] = util.get_columns( samples, r'^log_volatilities\[\d+\]$', ) return res extract_fns = {'identity': _ext_identity} return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def partially_observed_lorenz_system(observed_values, innovation_scale, observation_scale, observation_mask, step_size, observation_index): """Lorenz System model. Args: observed_values: Array of observed values. innovation_scale: Python `float`. observation_scale: Python `float`. observation_mask: `bool` array used to occlude observations. step_size: Python `float`. observation_index: `int` index used to pick which latent time series is observed. Returns: model: `StanModel`. """ code = """ data { int<lower=0> num_timesteps; int<lower=0> num_observations; int<lower=1> observation_state_index; int<lower = 1, upper = num_timesteps> observation_time_indices[ num_observations]; vector[num_observations] observations; real<lower=0> innovation_scale; real<lower=0> observation_scale; real<lower=0> step_size; } parameters { matrix[num_timesteps, 3] latents; } model { real x; real y; real z; row_vector[3] delta; latents[1] ~ normal(0, 1); for (t in 2:num_timesteps){ x = latents[t - 1, 1]; y = latents[t - 1, 2]; z = latents[t - 1, 3]; delta[1] = 10. * (y - x); delta[2] = x * (28. - z) - y; delta[3] = x * y - 8. / 3. * z; latents[t] ~ normal(latents[t - 1] + step_size * delta, sqrt(step_size) * innovation_scale); } observations ~ normal( latents[observation_time_indices, observation_state_index], observation_scale); } """ stan_data = { 'num_timesteps': len(observed_values), 'num_observations': len(observed_values[observation_mask]), 'observation_time_indices': np.arange(1, len(observed_values) + 1)[observation_mask], 'observations': observed_values[observation_mask], 'observation_state_index': observation_index + 1, 'innovation_scale': innovation_scale, 'observation_scale': observation_scale, 'step_size': step_size } model = util.cached_stan_model(code) def _ext_identity(samples): """Extracts the values of all latent variables.""" latents = util.get_columns(samples, r'^latents\.\d+\.\d+$') # Last two dimensions are swapped in Stan output. return latents.reshape((-1, 3, 30)).swapaxes(1, 2) extract_fns = {'identity': _ext_identity} return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def sparse_logistic_regression( train_features, train_labels, test_features=None, test_labels=None, ): """Bayesian logistic regression with a sparsity-inducing prior. Args: train_features: Floating-point `Tensor` with shape `[num_train_points, num_features]`. Training features. train_labels: Integer `Tensor` with shape `[num_train_points]`. Training labels. test_features: Floating-point `Tensor` with shape `[num_test_points, num_features]`. Testing features. Can be `None`, in which case test-related sample transformations are not computed. test_labels: Integer `Tensor` with shape `[num_test_points]`. Testing labels. Can be `None`, in which case test-related sample transformations are not computed. Returns: model: `StanModel`. """ code = """ data { int<lower=0> num_train_points; int<lower=0> num_test_points; int<lower=0> num_features; matrix[num_train_points,num_features] train_features; int<lower=0,upper=1> train_labels[num_train_points]; matrix[num_test_points,num_features] test_features; int<lower=0,upper=1> test_labels[num_test_points]; } parameters { vector[num_features] unscaled_weights; vector<lower=0>[num_features] local_scales; real<lower=0> global_scale; } model { { vector[num_features] weights; vector[num_train_points] logits; weights = unscaled_weights .* local_scales * global_scale; logits = train_features * weights; unscaled_weights ~ normal(0, 1); local_scales ~ gamma(0.5, 0.5); global_scale ~ gamma(0.5, 0.5); train_labels ~ bernoulli_logit(logits); } } generated quantities { real test_nll; real per_example_test_nll[num_test_points]; { vector[num_features] weights; vector[num_test_points] logits; weights = unscaled_weights .* local_scales * global_scale; logits = test_features * weights; test_nll = -bernoulli_logit_lpmf(test_labels | logits); for (i in 1:num_test_points) { per_example_test_nll[i] = -bernoulli_logit_lpmf( test_labels[i] | logits[i]); } } } """ have_test = test_features is not None train_features = _add_bias(train_features) if have_test: test_features = _add_bias(test_features) else: # cmdstanpy can't handle zero-sized arrays at the moment: # https://github.com/stan-dev/cmdstanpy/issues/203 test_features = train_features[:1] test_labels = train_labels[:1] stan_data = { 'num_train_points': train_features.shape[0], 'num_test_points': test_features.shape[0], 'num_features': train_features.shape[1], 'train_features': train_features, 'train_labels': train_labels, 'test_features': test_features, 'test_labels': test_labels, } model = util.cached_stan_model(code) def _ext_identity(samples): """Extract all the parameters.""" res = collections.OrderedDict() res['unscaled_weights'] = util.get_columns( samples, r'^unscaled_weights\[\d+\]$', ) res['local_scales'] = util.get_columns( samples, r'^local_scales\[\d+\]$', ) res['global_scale'] = util.get_columns( samples, r'^global_scale$', )[:, 0] return res def _ext_test_nll(samples): return util.get_columns(samples, r'^test_nll$')[:, 0] def _ext_per_example_test_nll(samples): return util.get_columns(samples, r'^per_example_test_nll\[\d+\]$') extract_fns = {'identity': _ext_identity} if have_test: extract_fns['test_nll'] = _ext_test_nll extract_fns['per_example_test_nll'] = _ext_per_example_test_nll return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def log_gaussian_cox_process( train_locations, train_extents, train_counts, ): """Log-Gaussian Cox Process model. Args: train_locations: Float `Tensor` with shape `[num_train_points, D]`. Training set locations where counts were measured. train_extents: Float `Tensor` with shape `[num_train_points]`. Training set location extents, must be positive. train_counts: Integer `Tensor` with shape `[num_train_points]`. Training set counts, must be positive. Returns: model: `StanModel`. """ code = """ data { int<lower=0> num_points; int<lower=0> num_features; vector[num_features] locations[num_points]; real<lower=0> extents[num_points]; int<lower=0> counts[num_points]; } transformed data { vector[num_points] loc; real mean_log_intensity; { mean_log_intensity = 0; for (i in 1:num_points) { mean_log_intensity += ( log(counts[i]) - log(extents[i])) / num_points; } for (i in 1:num_points) loc[i] = mean_log_intensity; // otherwise nan! } } parameters { real<lower=0> amplitude; real<lower=0> length_scale; vector[num_points] log_intensity; } model { { matrix[num_points, num_points] L_K; matrix[num_points, num_points] K = gp_matern32_cov( locations, amplitude + .001, length_scale + .001); for (i in 1:num_points) K[i,i] += 1e-6; // GP jitter L_K = cholesky_decompose(K); amplitude ~ lognormal(-1., .5); length_scale ~ lognormal(-1., 1.); log_intensity ~ multi_normal_cholesky(loc, L_K); for (i in 1:num_points) { counts[i] ~ poisson_log( log(extents[i]) + log_intensity[i]); } } } """ num_points = train_locations.shape[0] num_features = train_locations.shape[1] stan_data = { 'num_points': num_points, 'num_features': num_features, 'locations': train_locations, 'extents': train_extents, 'counts': train_counts, } model = util.cached_stan_model(code) def _ext_identity(samples): """Extract all the parameters.""" res = collections.OrderedDict() res['amplitude'] = util.get_columns( samples, r'^amplitude$', )[:, 0] res['length_scale'] = util.get_columns( samples, r'^length_scale$', )[:, 0] res['log_intensity'] = util.get_columns( samples, r'^log_intensity\.\d+$', ) return res extract_fns = {'identity': _ext_identity} return stan_model.StanModel( extract_fns=extract_fns, sample_fn=util.make_sample_fn(model, data=stan_data), )
def probit_regression( train_features, train_labels, test_features=None, test_labels=None, ): """Bayesian probit regression with a Gaussian prior. Args: train_features: Floating-point `Tensor` with shape `[num_train_points, num_features]`. Training features. train_labels: Integer `Tensor` with shape `[num_train_points]`. Training labels. test_features: Floating-point `Tensor` with shape `[num_test_points, num_features]`. Testing features. Can be `None`, in which case test-related sample transformations are not computed. test_labels: Integer `Tensor` with shape `[num_test_points]`. Testing labels. Can be `None`, in which case test-related sample transformations are not computed. Returns: model: `StanModel`. """ code = """ data { int<lower=0> num_train_points; int<lower=0> num_test_points; int<lower=0> num_features; matrix[num_train_points,num_features] train_features; int<lower=0,upper=1> train_labels[num_train_points]; matrix[num_test_points,num_features] test_features; int<lower=0,upper=1> test_labels[num_test_points]; } parameters { vector[num_features] weights; } model { { vector[num_train_points] probits; probits = train_features * weights; weights ~ normal(0, 1); # Stan doesn't have a way to do it in log-space. train_labels ~ bernoulli(Phi(probits)); } } generated quantities { real test_nll; real per_example_test_nll[num_test_points]; { vector[num_test_points] probits; probits = test_features * weights; test_nll = -bernoulli_lpmf(test_labels | Phi(probits)); for (i in 1:num_test_points) { per_example_test_nll[i] = -bernoulli_lpmf( test_labels[i] | Phi(probits[i])); } } } """ have_test = test_features is not None train_features = _add_bias(train_features) if have_test: test_features = _add_bias(test_features) else: # cmdstanpy can't handle zero-sized arrays at the moment: # https://github.com/stan-dev/cmdstanpy/issues/203 test_features = train_features[:1] test_labels = train_labels[:1] stan_data = { 'num_train_points': train_features.shape[0], 'num_test_points': test_features.shape[0], 'num_features': train_features.shape[1], 'train_features': train_features, 'train_labels': train_labels, 'test_features': test_features, 'test_labels': test_labels, } model = util.cached_stan_model(code) def _ext_identity(samples): return util.get_columns(samples, r'^weights\[\d+\]$') def _ext_test_nll(samples): return util.get_columns(samples, r'^test_nll$')[:, 0] def _ext_per_example_test_nll(samples): return util.get_columns(samples, r'^per_example_test_nll\[\d+\]$') extract_fns = {'identity': _ext_identity} if have_test: extract_fns['test_nll'] = _ext_test_nll extract_fns['per_example_test_nll'] = _ext_per_example_test_nll return stan_model.StanModel( extract_fns=extract_fns, # The default random initialization saturates the 'Phi' function, causing # initial log-probs to not be finite. Starting things off at 0 is more # stable. sample_fn=util.make_sample_fn(model, data=stan_data, inits={'weights': np.zeros([25])}), )