def __init__(self, model, covariates=None, data=None, mask=None, stateseq=None, gaussian_states=None, **kwargs): super(PGRecurrentSLDSStates, self).\ __init__(model, covariates=covariates, data=data, mask=mask, stateseq=stateseq, gaussian_states=gaussian_states, **kwargs) # Initialize the Polya gamma samplers if they haven't already been set if not hasattr(self, 'ppgs'): import pypolyagamma as ppg # Initialize the Polya-gamma samplers num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] # Initialize auxiliary variables for transitions self.trans_omegas = np.ones((self.T - 1, self.num_states - 1)) # If discrete and continuous states are given, resample the auxiliary variables once if stateseq is not None and gaussian_states is not None: self.resample_transition_auxiliary_variables()
def _pg_rnd(a, b): """ Takes draws from Polya-Gamma distribution with parameters a, b. """ ppg = pypolyagamma.PyPolyaGamma(np.random.randint(2**16)) N = a.shape[0] r = np.zeros((N, )) ppg.pgdrawv(a, b, r) return r
def test_density(b=1.0, c=0.0, N_smpls=10000, plot=False): # Draw samples from the PG(1,0) distributions ppg = pypolyagamma.PyPolyaGamma(np.random.randint(2**16)) smpls = np.zeros(N_smpls) ppg.pgdrawv(np.ones(N_smpls), np.zeros(N_smpls), smpls) # Compute the empirical PDF bins = np.linspace(0, 2.0, 50) centers = 0.5 * (bins[1:] + bins[:-1]) p_centers = pypolyagamma.pgpdf(centers, b, c) empirical_pdf, _ = np.histogram(smpls, bins, normed=True) # Check that the empirical pdf is close to the true pdf err = (empirical_pdf - p_centers) / p_centers assert np.all(np.abs(err) < 10.0), \ "Max error of {} exceeds tolerance of 5.0".format(abs(err).max()) if plot: import matplotlib.pyplot as plt plt.hist(smpls, bins=50, normed=True, alpha=0.5) # Plot high resolution density oms = np.linspace(1e-3, 2.0, 1000) pdf = pypolyagamma.pgpdf(oms, b, c) plt.plot(oms, pdf, '-b', lw=2) plt.show() return True
def sample_w(self): """ This method samples the augmenting w parameters from its conditional posterior distribution. For details about the augmentation see the paper. :return: samples for w_i from a polyagamma distribution. list of lists of arrays num_images x num_subjects x T(image, subject). """ nthreads = pypolyagamma.get_omp_num_threads() seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] w = [] for i in range(len(self.saliencies_ts)): w.append([]) for saliency_ts in self.saliencies_ts[i]: T = saliency_ts.shape[0] A = np.ones(T) w_is = np.zeros(T) pypolyagamma.pgdrawvpar( ppgs, A, np.abs(self.b.value * (saliency_ts - self.s_0.value)), w_is) w[-1].append(w_is) return w
def Gibbs_Sampler2(N, burnin, sigma2e, y, x, binomialN, thin=0): ''' N: Number of Samples thin: thinning parameter burnin: Number of samples to burnin x_init, w_init: initial values for x and w binomialN - vector of shots attempted ''' K = y.shape[0] shape_params = binomialN.astype(double) # nthreads = pypolyagamma.get_omp_num_threads() # seeds = np.random.randint(2**16, size=nthreads) # ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] ppg = pypolyagamma.PyPolyaGamma(np.random.randint(2**16)) # actual number of samples needed with thining and burin-in if (thin != 0): N_s = N * thin + burnin else: N_s = N + burnin samples = np.empty((N_s, K + 1)) w = np.zeros(K + 1) for i in range(N_s): if (i % 1000 == 0): print(i, end=" ") # print i, sys.stdout.flush() #sample the conditional distributions x, w #w = pg.polya_gamma(a=shape_params, c=abs(x)) ppg.pgdrawv(shape_params, abs(x), w) #pypolyagamma.pgdrawvpar(ppgs, shape_params, abs(x), w) x_prior, x_post, sigma2_prior, sigma2_post = FwdFilterEM1( y, w, x_init=0, sigma2_init=0, sigma2e=sigma2e, binomialN=binomialN) mean = x_post[-1] var = sigma2_post[-1] x[K] = np.random.normal(loc=mean, scale=np.sqrt(var)) for k in range(K - 1): # update equations x_star_post = x_post[K - k - 2] + (sigma2_post[K - k - 2] / (sigma2e + sigma2_post[K - k - 2])) * ( x[K - k] - x_post[K - k - 2]) sigma2_star_post = 1. / (1. / sigma2e + 1. / sigma2_post[K - k - 2]) # Draw sample for x x[K - k - 1] = np.random.normal(loc=x_star_post, scale=np.sqrt(sigma2_star_post)) samples[i, :] = x if (thin == 0): return samples[burnin:, :] else: return samples[burnin:N_s:thin, :]
def __init__(self, N, B, **kwargs): super(_SparsePGRegressionBase, self).__init__(N, B, **kwargs) # Initialize Polya-gamma samplers import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = npr.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
def test_no_seed(verbose=False): ppg = pypolyagamma.PyPolyaGamma() v1 = ppg.pgdraw(1., 1.) if verbose: print(v1) return True
def test_single_draw(verbose=False): np.random.seed(0) ppg = pypolyagamma.PyPolyaGamma(np.random.randint(2**16)) v1 = ppg.pgdraw(1., 1.) if verbose: print(v1) return True
def initialize_polya_gamma_samplers(self): if "OMP_NUM_THREADS" in os.environ: self.num_threads = int(os.environ["OMP_NUM_THREADS"]) else: self.num_threads = ppg.get_omp_num_threads() assert self.num_threads > 0 # Choose random seeds seeds = np.random.randint(2**16, size=self.num_threads) return [ppg.PyPolyaGamma(seed) for seed in seeds]
def test_vector_draw(verbose=False): np.random.seed(0) ppg = pypolyagamma.PyPolyaGamma(np.random.randint(2**16)) # Call the vectorized version n = 5 v2 = np.zeros(n) a = 14 * np.ones(n, dtype=np.float) b = 0 * np.ones(n, dtype=np.float) ppg.pgdrawv(a, b, v2) if verbose: print(v2) return True
def sample_w_i(S, J_i): """ :param S: observation matrix :param J_i: neuron i's couplings :return: samples for w_i from a polyagamma distribution """ nthreads = pypolyagamma.get_omp_num_threads() seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] T = S.shape[0] A = np.ones(T) w_i = np.zeros(T) pypolyagamma.pgdrawvpar(ppgs, A, np.dot(S, J_i), w_i) return w_i
def test_parallel(verbose=False): # Call the parallel vectorized version np.random.seed(0) n = 5 nthreads = 8 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) if verbose: print(v3) return True
def sample_w_i(S, J_i): """ :param S: observation matrix :param J_i: neuron i's couplings :return: samples for w_i from a polyagamma distribution """ ppg = pypolyagamma.PyPolyaGamma(np.random.randint(2 ** 16)) T = S.shape[0] A = np.ones(T) w_i = np.zeros(T) ppg.pgdrawv(A, np.dot(S, J_i), w_i) return w_i
def ks_test(b=1.0, c=0.0, N_smpls=10000, N_pts=10000): """ Kolmogorov-Smirnov test. We can't calculate the CDF exactly, but we can do a pretty good job with numerical integration. """ # Estimate the true CDF oms = np.linspace(1e-5, 3.0, N_pts) pdf = pypolyagamma.pgpdf(oms, b, c, trunc=200) cdf = lambda x: min(np.trapz(pdf[oms < x], oms[oms < x]), 1.0) # Draw samples ppg = pypolyagamma.PyPolyaGamma(np.random.randint(2**16)) smpls = 1e-3 * np.ones(N_smpls) ppg.pgdrawv(b * np.ones(N_smpls), c * np.ones(N_smpls), smpls) # TODO: Not sure why this always gives a p-value of zero from scipy.stats import kstest print(kstest(smpls, cdf))
def __init__(self, S, C): self.S, self.C = S, C self.T, self.N = S.shape self.c = np.random.randint(0, C, size=self.N) self.psis = np.zeros((self.T, self.C)) from pybasicbayes.distributions.gaussian import ScalarGaussianNIX self.gaussian = ScalarGaussianNIX(mu_0=0, kappa_0=1, sigmasq_0=1.0, nu_0=2.0) import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] self.omega = np.zeros((self.T, self.N))
def __init__(self, model, data=None, mask=None, **kwargs): super(LDSStatesCountData, self). \ __init__(model, data=data, mask=mask, **kwargs) # Check if the emission matrix is a count regression from pypolyagamma.distributions import _PGLogisticRegressionBase if isinstance(self.emission_distn, _PGLogisticRegressionBase): self.has_count_data = True # Initialize the Polya-gamma samplers import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] # Initialize auxiliary variables, omega self.omega = np.ones((self.T, self.D_emission), dtype=np.float) else: self.has_count_data = False
def poly_gamma_rand(n, Psi): """ returns Polyagamma random variables Parameters ---------- pg: polya gamma object n: [M x K-1] count matrix Psi: [M x K-1] Gaussian variables Returns ------- omega: [MxK-1] polya gamma variables conditioned on Psi and data n (sufficient statistics of X) """ # f = np.vectorize(pg.pgdraw) # return f(n, Psi) pg = pypolyagamma.PyPolyaGamma(np.random.randint(0, 2**63, 1)) return np.reshape( [pg.pgdraw(i, j) for i, j in zip(n.ravel(), Psi.ravel())], n.shape)
def __init__(self, S, D): self.S = S self.T, self.N = S.shape self.D = D self.Z = np.zeros((self.N, self.D)) self.omega = np.zeros((self.T, self.N)) # Initialize regression model # from pybasicbayes.distributions.regression import Regression # S_0 = np.eye(self.T) # K_0 = np.eye(self.D+1) # M_0 = np.zeros((self.T, self.D+1)) # nu_0 = self.T+2 # self.regression = Regression(nu_0, S_0, M_0, K_0, affine=True) self.A = np.zeros((self.T, self.D)) self.bias = np.zeros((self.T, )) import pypolyagamma as ppg num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds]
""" Call the different sample methods """ import numpy as np np.random.seed(0) import pypolyagamma as pypolyagamma rng = pypolyagamma.PyRNG(0) ppg = pypolyagamma.PyPolyaGamma(np.random.randint(2**16)) # # Call the single sample # v1 = ppg.pgdraw(1.,1.) # print v1 # # # Call the vectorized version n = 5 # v2 = np.zeros(n) a = 14 * np.ones(n, dtype=np.float) b = 0 * np.ones(n, dtype=np.float) # ppg.pgdrawv(a, b, v2) # print v2 # Call the parallel vectorized version # n = 5 nthreads = 8 v3 = np.zeros(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) print(v3)
def __init__(self, model, data=None, **kwargs): # The data must be provided in sparse row format # This makes it easy to iterate over rows. Basically, # for each row, t, it is easy to get the output dimensions, n, # such that y_{t,n} > 0. super(LDSStatesZeroInflatedCountData, self).\ __init__(model, data=data, **kwargs) # Initialize the Polya-gamma samplers num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] # Initialize the masked data if data is not None: assert isinstance( data, csr_matrix ), "Data must be a sparse row matrix for zero-inflated models" # Initialize a sparse matrix of masked data. The mask # specifies which observations were "exposed" and which # were determinisitcally zero. In other words, the mask # gives the data values at the places where z_{t,n} = 1. T, N, C, D, b = self.T, self.D_emission, self.C, self.D, self.emission_distn.b indptr = [0] indices = [] vals = [] offset = 0 for t in range(T): # Get the nonzero entries in the t-th row ns_t = data.indices[data.indptr[t]:data.indptr[t + 1]] y_t = np.zeros(N) y_t[ns_t] = data.data[data.indptr[t]:data.indptr[t + 1]] # Sample zero inflation mask z_t = np.random.rand(N) < self.rho z_t[ns_t] = True # Construct the sparse matrix t_inds = np.where(z_t)[0] indices.append(t_inds) vals.append(y_t[t_inds]) offset += t_inds.size indptr.append(offset) # Construct a sparse matrix vals = np.concatenate(vals) indices = np.concatenate(indices) indptr = np.array(indptr) self.masked_data = csr_matrix((vals, indices, indptr), shape=(T, N)) # DEBUG: Start with all the data # dense_data = data.toarray() # values = dense_data.ravel() # indices = np.tile(np.arange(self.D_emission), (self.T,)) # indptrs = np.arange(self.T+1) * self.D_emission # self.masked_data = csr_matrix((values, indices, indptrs), (self.T, self.D_emission)) # assert np.allclose(self.masked_data.toarray(), dense_data) self.resample_auxiliary_variables() else: self.masked_data = None self.omega = None
def __init__(self, population, trunc=200): self.population = population num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) self.ppgs = [ppg.PyPolyaGamma(seed, trunc) for seed in seeds] self.N = self.population.N
from scipy.stats import norm, probplot # Use a simple Normal-Bernoulli model # z ~ N(z | 0, 1) # x ~ [Bern(x | \sigma(z))]^{1/T} = Bern(x | \sigma(z / T)) # Where T is the temperature of the tempered distribution in [1, \inf) # When T=1 we target the posterior. When T=\inf we target the prior T = 2.0 mu_z = 0.0 sigma_z = 1.0 # Initialize Polya-gamma samplers num_threads = ppg.get_omp_num_threads() seeds = np.random.randint(2**16, size=num_threads) ppgs = [ppg.PyPolyaGamma(seed) for seed in seeds] def kappa(x): # Compute kappa = [a(x) - b(x)/2.] / T # for the Bernoulli model where a(x) = x and b(x) = 1 return (x - 0.5) / T def resample_z(x, omega): # Resample z from its Gaussian conditional prior_J = 1. / sigma_z prior_h = prior_J * mu_z lkhd_J = omega lkhd_h = kappa(x)
def test_parallel2(): """Test multiple cases of OMP""" num_threads = pypolyagamma.get_omp_num_threads() if num_threads < 2: return np.random.seed(0) # Case 1: n < nthreads, nthreads = num_threads nthreads = num_threads n = nthreads - 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 2: n < nthreads, nthreads < num_threads nthreads = num_threads - 1 n = nthreads - 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 3: n < nthreads, nthreads > num_threads nthreads = num_threads + 1 n = nthreads - 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 4: n > nthreads, nthreads = num_threads nthreads = num_threads n = nthreads + 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 5: n > nthreads, nthreads < num_threads nthreads = num_threads - 1 n = nthreads + 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 6: n > nthreads, nthreads > num_threads nthreads = num_threads + 1 n = nthreads + 1 v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 7: n = nthreads, nthreads = num_threads nthreads = num_threads n = nthreads v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 8: n = nthreads, nthreads < num_threads nthreads = num_threads - 1 n = nthreads v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) # Case 9: n = nthreads, nthreads > num_threads nthreads = num_threads + 1 n = nthreads v3 = np.zeros(n) a = 14 * np.ones(n) b = 0 * np.ones(n) seeds = np.random.randint(2**16, size=nthreads) ppgs = [pypolyagamma.PyPolyaGamma(seed) for seed in seeds] pypolyagamma.pgdrawvpar(ppgs, a, b, v3) return True