def bsem( items, factors, paths, beta=0, nu_sd=2.5, alpha_sd=2.5, d_beta=2.5, corr_items=False, corr_factors=False, g_eta=100, l_eta=1, beta_beta=1, ): r"""Constructs Bayesian SEM. Args: items (np.array): Array of item data. factors (np.array): Factor design. paths (np.array): Array of directed factor paths. beta (:obj:`float` or `'estimate'`, optional): Standard deviation of normal prior on cross loadings. If `'estimate'`, beta is estimated from the data. nu_sd (:obj:`float`, optional): Standard deviation of normal prior on item intercepts. alpha_sd (:obj:`float`, optional): Standard deviation of normal prior on factor intercepts. d_beta (:obj:`float`, optional): Scale parameter of half-Cauchy prior on factor standard deviation. corr_factors (:obj:`bool`, optional): Allow correlated factors. corr_items (:obj:`bool`, optional): Allow correlated items. g_eta (:obj:`float`, optional): Shape parameter of LKJ prior on residual item correlation matrix. l_eta (:obj:`float`, optional): Shape parameter of LKJ prior on factor correlation matrix. beta_beta (:obj:`float`, optional): Beta parameter of beta prior on beta. Returns: None: Places model in context. """ # get numbers of cases, items, and factors n, p = items.shape p_, m = factors.shape assert p == p_, "Mismatch between data and factor-loading matrices" assert paths.shape == (m, m), "Paths matrix has wrong shape" I = tt.eye(m, m) # place priors on item and factor intercepts nu = pm.Normal(name=r"$\nu$", mu=0, sd=nu_sd, shape=p, testval=items.mean(axis=0)) alpha = pm.Normal(name=r"$\alpha$", mu=0, sd=alpha_sd, shape=m, testval=np.zeros(m)) # place priors on unscaled factor loadings Phi = pm.Normal(name=r"$\Phi$", mu=0, sd=1, shape=factors.shape, testval=factors) # place priors on paths B = tt.zeros(paths.shape) npths = np.sum(paths, axis=None) print(npths) if npths > 0: b = pm.Normal(name=r"$b$", mu=0, sd=1, shape=npths, testval=np.ones(npths)) # create the paths matrix k = 0 for i in range(m): for j in range(m): if paths[i, j] == 1: B = tt.set_subtensor(B[i, j], b[k]) k += 1 Gamma = pm.Deterministic("$\Gamma$", B) # create masking matrix for factor loadings if isinstance(beta, str): assert beta == "estimate", f"Don't know what to do with '{beta}'" beta = pm.Beta(name=r"$\beta$", alpha=1, beta=beta_beta, testval=0.1) M = (1 - np.asarray(factors)) * beta + np.asarray(factors) # create scaled factor loadings Lambda = pm.Deterministic(r"$\Lambda$", Phi * M) # determine item means mu = nu + matrix_dot(Lambda, alpha) # place priors on item standard deviations D = pm.HalfCauchy(name=r"$D$", beta=d_beta, shape=p, testval=items.std(axis=0)) # place priors on item correlations f = pm.Lognormal.dist(sd=0.25) if not corr_items: Omega = np.eye(p) else: G = pm.LKJCholeskyCov(name=r"$G$", eta=g_eta, n=p, sd_dist=f) ch1 = pm.expand_packed_triangular(p, G, lower=True) K = tt.dot(ch1, ch1.T) sd1 = tt.sqrt(tt.diag(K)) Omega = pm.Deterministic(r"$\Omega$", K / sd1[:, None] / sd1[None, :]) # determine residual item variances and covariances Theta = pm.Deterministic(r"$\Theta$", D[None, :] * Omega * D[:, None]) # place priors on factor correlations if not corr_factors: Psi = np.eye(m) else: L = pm.LKJCholeskyCov(name=r"$L$", eta=l_eta, n=m, sd_dist=f) ch = pm.expand_packed_triangular(m, L, lower=True) Gamma = tt.dot(ch, ch.T) sd = tt.sqrt(tt.diag(Gamma)) Psi = pm.Deterministic(r"$\Psi$", Gamma / sd[:, None] / sd[None, :]) # determine variances and covariances of items A = matrix_inverse(I - Gamma) C = matrix_inverse(I - Gamma.T) Sigma = matrix_dot(Lambda, A, Psi, C, Lambda.T) + Theta # place priors on observations pm.MvNormal(name="$Y$", mu=mu, cov=Sigma, observed=items, shape=items.shape)
def bcfab(items, factors, paths, nu_sd=2.5, alpha_sd=2.5): r"""Constructs a Bayesian CFA model in "binomial form". Args: items (np.array): Data. factors (np.array): Factor design matrix. paths (np.array): Paths design matrix. nu_sd (:obj:`float`, optional): Standard deviation of normal prior on item intercepts. alpha_sd (:obj:`float`, optional): Standard deviation of normal prior on factor intercepts. Returns: None: Places model in context. """ # get numbers of cases, items, and factors n, p = items.shape p_, m = factors.shape assert p == p_, "Mismatch between data and factor-loading matrices" # priors on item intercepts nu = pm.Normal(name=r"$\nu$", mu=0, sd=nu_sd, shape=p, testval=np.zeros(p)) # priors on factor intercepts alpha = pm.Normal(name=r"$\alpha$", mu=0, sd=alpha_sd, shape=m, testval=np.zeros(m)) # priors on factor loadings l = np.asarray(factors).sum() lam = pm.Normal(name=r"$\lambda$", mu=0, sd=1, shape=l, testval=np.zeros(l)) # loading matrix Lambda = tt.zeros(factors.shape) k = 0 for i, j in product(range(p), range(m)): if factors[i, j] == 1: Lambda = tt.inc_subtensor(Lambda[i, j], lam[k]) k += 1 pm.Deterministic(name=r"$\Lambda$", var=Lambda) # priors on paths g = np.asarray(paths).sum() gam = pm.Normal(name=r"$\gamma$", mu=0, sd=1, shape=g, testval=np.zeros(g)) # path matrix Gamma = tt.zeros(paths.shape) k = 0 for i, j in product(range(m), range(m)): if paths[i, j] == 1: Gamma = tt.inc_subtensor(Gamma[i, j], gam[k]) k += 1 pm.Deterministic(name=r"$\Gamma$", var=Gamma) # priors on factor residuals zeta = pm.Normal(name=r"$\zeta$", mu=0, sigma=1, shape=(n, m), testval=0) # latent variables I = np.eye(m) Pi = pm.math.sigmoid(nu + matrix_dot( matrix_dot((alpha + zeta), matrix_inverse(I - Gamma.T)), Lambda.T)) # observations pm.Binomial(name="$Y$", p=Pi, n=items.max(axis=0), observed=items, shape=items.shape)
def bcfa(Y, M): r"""Constructs a Bayesian confirmatory factor analysis (BCFA) model. Args: Y (numpy.ndarray): An $n \times p$ matrix of data where $n$ is the sample size and $p$ is the number of manifest variables. M (numpy.ndarray): An $p \times m$ matrix to describe model structure where $m$ is the number of latent variables. Notes: $$\mathbf{Y}$$ probably should be standardized first if you are using continuous data. Entries in $\mathbf{M}$ should be [0, 1]. $\mathbf{M}_{(i,j)}$ represents the variance of the normal prior placed on the regression coefficient from the $j$th latent variable to the $i$th manifest variable. Values of 0 remove the coefficient from the model entirely, 1 represents a "full-strength" coefficient, and values (0, 1) are for cross-loadings. Returns: None: Model is placed in the context. """ # counts n, p = Y.shape p_, m = M.shape assert p == p_, "M is the wrong shape" # intercepts for manifest variables sd = max(np.abs(Y.mean()).max() * 2.5, 2.5) nu = pm.Normal(name=r"$\nu$", mu=0, sd=sd, shape=p, testval=Y.mean()) # unscaled regression coefficients Phi = pm.Normal(name=r"$\Phi$", mu=0, sd=1, shape=M.shape, testval=M) # scaled regression coefficients Lambda = pm.Deterministic(r"$\Lambda$", Phi * np.sqrt(M)) # intercepts for latent variables alpha = pm.Normal(name=r"$\alpha$", mu=0, sd=2.5, shape=m, testval=0) # means of manifest variables mu = nu + matrix_dot(Lambda, alpha) # standard deviations of manifest variables D = pm.HalfCauchy(name=r"$D$", beta=2.5, shape=p, testval=Y.std()) # correlations between manifest variables Omega = np.eye(p) # covariance matrix for manifest variables Theta = pm.Deterministic(r"$\Theta$", D[None, :] * Omega * D[:, None]) # covariance matrix on latent variables f = pm.Lognormal.dist(sd=0.25) L = pm.LKJCholeskyCov(name=r"$L$", eta=1, n=m, sd_dist=f) ch = pm.expand_packed_triangular(m, L, lower=True) Gamma = tt.dot(ch, ch.T) sd = tt.sqrt(tt.diag(Gamma)) Psi = pm.Deterministic(r"$\Psi$", Gamma / sd[:, None] / sd[None, :]) # covariance of manifest variables Sigma = matrix_dot(Lambda, Psi, Lambda.T) + Theta # observations pm.MvNormal(name="Y", mu=mu, cov=Sigma, observed=Y, shape=Y.shape)
def bcfam(items, factors, paths, nu_sd=2.5, alpha_sd=2.5, d_beta=2.5): r"""Constructs a Bayesian CFA model in "multivariate form". Args: items (np.array): Data. factors (np.array): Factor design matrix. paths (np.array): Paths design matrix. nu_sd (:obj:`float`, optional): Standard deviation of normal prior on item intercepts. alpha_sd (:obj:`float`, optional): Standard deviation of normal prior on factor intercepts. d_beta (:obj:`float`, optional): Scale parameter of half-Cauchy prior on factor standard deviation. Returns: None: Places model in context. """ # get numbers of cases, items, and factors n, p = items.shape p_, m = factors.shape assert p == p_, "Mismatch between data and factor-loading matrices" # priors on item intercepts nu = pm.Normal(name=r"$\nu$", mu=0, sd=nu_sd, shape=p, testval=items.mean(axis=0)) # priors on factor intercepts alpha = pm.Normal(name=r"$\alpha$", mu=0, sd=alpha_sd, shape=m, testval=np.zeros(m)) # priors on factor loadings l = np.asarray(factors).sum() lam = pm.Normal(name=r"$\lambda$", mu=0, sd=1, shape=l, testval=np.ones(l)) # loading matrix Lambda = tt.zeros(factors.shape) k = 0 for i, j in product(range(p), range(m)): if factors[i, j] == 1: Lambda = tt.inc_subtensor(Lambda[i, j], lam[k]) k += 1 pm.Deterministic(name=r"$\Lambda$", var=Lambda) # item means mu = nu + matrix_dot(Lambda, alpha) # item residual covariance matrix d = pm.HalfCauchy(name=r"$\sqrt{\theta}$", beta=d_beta, shape=p, testval=items.std(axis=0)) Theta = tt.diag(d)**2 # factor covariance matrix Psi = I = np.eye(m) # priors on paths g = np.asarray(paths).sum() gam = pm.Normal(name=r"$\gamma$", mu=0, sd=1, shape=g, testval=np.ones(g)) # path matrix Gamma = tt.zeros(paths.shape) k = 0 for i, j in product(range(m), range(m)): if paths[i, j] == 1: Gamma = tt.inc_subtensor(Gamma[i, j], gam[k]) k += 1 pm.Deterministic(name=r"$\Gamma$", var=Gamma) # item covariance matrix Sigma = (matrix_dot( Lambda, matrix_inverse(I - Gamma), Psi, matrix_inverse(I - Gamma.T), Lambda.T, ) + Theta) # observations pm.MvNormal(name="$Y$", mu=mu, cov=Sigma, observed=items, shape=items.shape)
def n_star_inference(n_stars, iteration, elem_err=False, n_init=20000, n_samples=1000, max_stars=100): ## Define which stars to use these_stars = np.arange(max_stars)[iteration * n_stars:(iteration + 1) * n_stars] ## Load in mock dataset mock_data = np.load(mock_data_file) #dataset mu_times = mock_data.f.obs_time[these_stars] #time of birth sigma_times = mock_data.f.obs_time_err[these_stars] #error on age all_els = mock_data.f.elements full_abundances = mock_data.f.abundances[ these_stars] # chemical element abundances for data full_errors = mock_data.f.abundance_errs[ these_stars] # error on abundances # Filter out correct elements: els = ['C', 'Fe', 'He', 'Mg', 'N', 'Ne', 'O', 'Si'] # TNG elements n_els = len(els) el_indices = np.zeros(len(els), dtype=int) for e, el in enumerate(els): for j in range(len(all_els)): if els[e] == str(all_els[j]): el_indices[e] = j break if j == len(all_els) - 1: print("Failed to find element %s" % el) obs_abundances = full_abundances[:, el_indices] obs_errors = full_errors[:, el_indices] # Now standardize dataset norm_data = (obs_abundances - output_mean) / output_std norm_sd = obs_errors / output_std data_obs = norm_data.ravel() data_sd = np.asarray(norm_sd).ravel() std_times_mean = (mu_times - input_mean[-1]) / input_std[-1] std_times_width = sigma_times / input_std[-1] # Define stacked local priors Local_prior_mean = np.vstack([ np.hstack([std_Theta_prior_mean, std_times_mean[i]]) for i in range(n_stars) ]) Local_prior_sigma = np.vstack([ np.hstack([std_Theta_prior_width, std_times_width[i]]) for i in range(n_stars) ]) # Bound variables to ensure they don't exit the training parameter space lowBound = tt._shared(np.asarray([-5, std_log_SFR_crit, -5, std_min_time])) upBound = tt._shared(np.asarray([5, 5, 5, std_max_time])) # Create stacked mean and variances loc_mean = np.hstack([ np.asarray(std_Theta_prior_mean).reshape(1, -1) * np.ones([n_stars, 1]), std_times_mean.reshape(-1, 1) ]) loc_std = np.hstack([ np.asarray(std_Theta_prior_width).reshape(1, -1) * np.ones([n_stars, 1]), std_times_width.reshape(-1, 1) ]) # Share theano variables w0 = tt._shared(w_array_0) b0 = tt._shared(b_array_0) w1 = tt._shared(w_array_1) b1 = tt._shared(b_array_1) ones_tensor = tt.ones([n_stars, 1]) b0_all = ma.matrix_dot(ones_tensor, b0) b1_all = ma.matrix_dot(ones_tensor, b1) # Define PyMC3 Model simple_model = pm.Model() with simple_model: # Define priors Lambda = pm.Normal('Std-Lambda', mu=std_Lambda_prior_mean, sd=std_Lambda_prior_width, shape=(1, len(std_Lambda_prior_mean))) Locals = pm.Normal( 'Std-Local', mu=loc_mean, sd=loc_std, shape=loc_mean.shape, transform=pm.distributions.transforms.Interval(lowBound, upBound), ) TimeSq = tt.reshape(Locals[:, -1]**2., (n_stars, 1)) TruLa = pm.Deterministic('Lambda', Lambda * input_std[:2] + input_mean[:2]) TruTh = pm.Deterministic( 'Thetas', Locals[:, :3] * input_std[2:5] + input_mean[2:5]) TruTi = pm.Deterministic( 'Times', Locals[:, -1] * input_std[-1] + input_mean[-1]) ## NEURAL NET Lambda_all = ma.matrix_dot(ones_tensor, Lambda) InputVariables = ma.concatenate([Lambda_all, Locals, TimeSq], axis=1) layer1 = ma.matrix_dot(InputVariables, w0) + b0_all output = ma.matrix_dot(ma.tanh(layer1), w1) + b1_all if elem_err: # ERRORS #element_error = pm.Normal('Element-Error',mu=-2,sd=1,shape=(1,n_els)) element_error = pm.HalfCauchy('Std-Element-Error', beta=0.01 / output_std, shape=(1, n_els)) TruErr = pm.Deterministic('Element-Error', element_error * output_std) stacked_error = ma.matrix_dot(ones_tensor, element_error) tot_error = ma.sqrt( stacked_error**2. + norm_sd**2.) # NB this is all standardized by output_std here else: tot_error = norm_sd # NB: all quantities are standardized here predictions = pm.Deterministic("Predicted-Abundances", output * output_std + output_mean) # Define likelihood function (unravelling output to make a multivariate gaussian) likelihood = pm.Normal('likelihood', mu=output.ravel(), sd=tot_error.ravel(), observed=norm_data.ravel()) # Now sample init_time = ttime.time() with simple_model: samples = pm.sample(draws=n_samples, chains=chains, cores=cores, tune=tune, nuts_kwargs={'target_accept': 0.9}, init='advi+adapt_diag', n_init=n_init) end_time = ttime.time() - init_time def construct_output(samples): Lambda = samples.get_values('Lambda')[:, 0, :] Thetas = samples.get_values('Thetas')[:, :, :] Times = samples.get_values('Times')[:, :] predictions = samples.get_values('Predicted-Abundances')[:, :, :] if elem_err: Errs = samples.get_values('Element-Error')[:, 0, :] return Lambda, Thetas, Times, Errs, predictions else: return Lambda, Thetas, Times, predictions print("Finished after %.2f seconds" % end_time) if elem_err: Lambda, Thetas, Times, Errs, predictions = construct_output(samples) return Lambda, Thetas, Times, end_time, Errs, predictions else: Lambda, Thetas, Times, predictions = construct_output(samples) return Lambda, Thetas, Times, end_time, predictions