def __init__(self, alpha_0=None, beta_0=None, alphas_0=None, betas_0=None, r_support=None, r_probs=None, r_discrete_distn=None, r=None, ps=None): assert (r_discrete_distn is not None) ^ (r_support is not None and r_probs is not None) if r_discrete_distn is not None: r_support, = np.where(r_discrete_distn) r_probs = r_discrete_distn[r_support] r_support += 1 self.r_support = np.asarray(r_support) self.rho_0 = self.rho_mf = np.log(r_probs) assert (alpha_0 is not None and beta_0 is not None) \ ^ (alphas_0 is not None and betas_0 is not None) alphas_0 = alphas_0 if alphas_0 is not None else [alpha_0 ] * len(r_support) betas_0 = betas_0 if betas_0 is not None else [beta_0] * len(r_support) ps = ps if ps is not None else [None] * len(r_support) self._fixedr_distns = \ [self._fixedr_class(r=r,p=p,alpha_0=alpha_0,beta_0=beta_0) for r,p,alpha_0,beta_0 in zip(r_support,ps,alphas_0,betas_0)] # for init self.ridx = sample_discrete(r_probs) self.r = r_support[self.ridx]
def downsample_data_slow(X, n): """ Downsample each row of X such that it sums to n by randomly removing entries """ from pybasicbayes.util.stats import sample_discrete assert X.ndim == 2 Xsub = X.copy() for i in range(Xsub.shape[0]): Mi = int(Xsub[i].sum()) assert Mi >= n # if Mi > 1e8: print "Warning: M is really large!" p = Xsub[i] / float(Mi) # Random remove one of the entries to remove for m in range(Mi-n): k = sample_discrete(p) assert Xsub[i,k] > 0 Xsub[i,k] -= 1 p = Xsub[i] / float(Xsub[i].sum()) assert Xsub[i].sum() == n return Xsub
def resample(self,data=[]): alpha_n, betas_n, posterior_discrete = self._posterior_hypparams( *self._get_statistics(data)) r_idx = sample_discrete(posterior_discrete) self.r = self.r_support[r_idx] self.p = np.random.beta(alpha_n, betas_n[r_idx])
def downsample_data_slow(X, n): """ Downsample each row of X such that it sums to n by randomly removing entries """ from pybasicbayes.util.stats import sample_discrete assert X.ndim == 2 Xsub = X.copy() for i in xrange(Xsub.shape[0]): Mi = int(Xsub[i].sum()) assert Mi >= n # if Mi > 1e8: print "Warning: M is really large!" p = Xsub[i] / float(Mi) # Random remove one of the entries to remove for m in xrange(Mi-n): k = sample_discrete(p) assert Xsub[i,k] > 0 Xsub[i,k] -= 1 p = Xsub[i] / float(Xsub[i].sum()) assert Xsub[i].sum() == n return Xsub
def _generate(self, N): # run a CRP forwards alpha_0 = self.alpha_0 self.z = np.zeros(N, dtype=np.int32) for n in range(N): self.z[n] = sample_discrete( np.concatenate((np.bincount(self.z[:n]), (alpha_0, ))))
def generate(self, T=100, keep=True, init_data=None, covariates=None, with_noise=True): from pybasicbayes.util.stats import sample_discrete # Generate from the prior and raise exception if unstable K, n = self.num_states, self.D # Prepare the covariates if covariates is None: covariates = np.zeros((T, 0)) else: assert covariates.shape[0] == T # Initialize discrete state sequence pi_0 = self.init_state_distn.pi_0 dss = np.empty(T, dtype=np.int32) dss[0] = sample_discrete(pi_0.ravel()) data = np.empty((T, n), dtype='double') if init_data is None: data[0] = np.random.randn(n) else: data[0] = init_data for t in range(1, T): # Sample discrete state given previous continuous state and covariates cov_t = np.column_stack((data[t - 1:t], covariates[t])) A = self.trans_distn.get_trans_matrices(cov_t)[0] dss[t] = sample_discrete(A[dss[t - 1], :]) # Sample continuous state given current discrete state if with_noise: data[t] = self.obs_distns[dss[t]].rvs(cov_t, return_xy=False) else: data[t] = self.obs_distns[dss[t]].predict(cov_t) assert np.all(np.isfinite( data[t])), "RARHMM appears to be unstable!" # TODO: # if keep: # ... return data, dss
def generate_states(self, initial_condition=None, with_noise=True, stateseq=None): """ Jointly sample the discrete and continuous states """ from pybasicbayes.util.stats import sample_discrete # Generate from the prior and raise exception if unstable T, K, n = self.T, self.num_states, self.D_latent # Initialize discrete state sequence dss = -1 * np.ones(T, dtype=np.int32) if stateseq is None else stateseq gss = np.empty((T, n), dtype='double') if initial_condition is None: init_state_distn = np.ones(self.num_states) / float( self.num_states) dss[0] = sample_discrete(init_state_distn.ravel()) gss[0] = self.init_dynamics_distns[dss[0]].rvs() else: dss[0] = initial_condition[0] gss[0] = initial_condition[1] for t in range(1, T): # Sample discrete state given previous continuous state A = self.trans_distn.get_trans_matrices(gss[t - 1:t])[0] if with_noise: # Sample discrete state from recurrent transition matrix if dss[t] == -1: dss[t] = sample_discrete(A[dss[t - 1], :]) # Sample continuous state given current discrete state gss[t] = self.dynamics_distns[dss[t-1]].\ rvs(x=np.hstack((gss[t-1][None,:], self.inputs[t-1][None,:])), return_xy=False) else: # Pick the most likely next discrete state and continuous state if dss[t] == -1: dss[t] = np.argmax(A[dss[t - 1], :]) gss[t] = self.dynamics_distns[dss[t-1]]. \ predict(np.hstack((gss[t-1][None,:], self.inputs[t-1][None,:]))) assert np.all(np.isfinite(gss[t])), "SLDS appears to be unstable!" self.stateseq = dss self.gaussian_states = gss
def energy(self,data): # TODO TODO this function is horrible assert data.ndim == 1 if np.isnan(data).any(): return 0. from .util.stats import sample_discrete likes = np.array([c.log_likelihood(data) for c in self.components]).reshape((-1,)) likes += np.log(self.weights.weights) label = sample_discrete(np.exp(likes - likes.max())) return self.components[label].energy(data)
def sample(self, z, x, i, n): """ Sample the next state given the previous time index :param z: TxNxD buffer of particle states :param x: NxD output buffer for observations :param i: Time index to sample :param n: Particle index to sample """ psi = np.dot(self.C, z[i, n, :]) + self.mu pi = psi_to_pi(psi) from pybasicbayes.util.stats import sample_discrete s = sample_discrete(pi) x[i, :] = 0 x[i, s] = 1
def generate_states(self, initial_condition=None, with_noise=True, stateseq=None): """ Generate discrete and continuous states. Note that the handling of 'with_noise' differs slightly from pySLDS implementation. Rather than selecting the most likely discrete state, we randomly sample the discrete statse. """ if stateseq is None: As = self.trans_matrix self.stateseq = -1 * np.ones(self.T, dtype=np.int32) self.stateseq[0] = np.random.choice(self.num_states) for t in range(1, self.T): self.stateseq[t] = sample_discrete(As[t-1, self.stateseq[t-1], :].ravel()) else: assert stateseq.shape == (self.T,) self.stateseq = stateseq.astype(np.int32)
def sample(self, z, x, i,n): """ Sample the next state given the previous time index :param z: TxNxD buffer of particle states :param x: NxD output buffer for observations :param i: Time index to sample :param n: Particle index to sample """ psi = np.dot(self.C, z[i,n,:]) + self.mu pi = psi_to_pi(psi) from pybasicbayes.util.stats import sample_discrete s = sample_discrete(pi) x[i,:] = 0 x[i,s] = 1
def rvs(self,customer_counts): # could replace this with one of the faster C versions I have lying # around, but at least the Python version is clearer assert isinstance(customer_counts,list) or isinstance(customer_counts,int) if isinstance(customer_counts,int): customer_counts = [customer_counts] restaurants = [] for num in customer_counts: # a CRP with num customers tables = [] for c in range(num): newidx = sample_discrete(np.array(tables + [self.concentration])) if newidx == len(tables): tables += [1] else: tables[newidx] += 1 restaurants.append(tables) return restaurants if len(restaurants) > 1 else restaurants[0]
def __init__(self,alpha_0=None,beta_0=None,alphas_0=None,betas_0=None, r_support=None,r_probs=None,r_discrete_distn=None, r=None,ps=None): assert (r_discrete_distn is not None) ^ (r_support is not None and r_probs is not None) if r_discrete_distn is not None: r_support, = np.where(r_discrete_distn) r_probs = r_discrete_distn[r_support] r_support += 1 self.r_support = np.asarray(r_support) self.rho_0 = self.rho_mf = np.log(r_probs) assert (alpha_0 is not None and beta_0 is not None) \ ^ (alphas_0 is not None and betas_0 is not None) alphas_0 = alphas_0 if alphas_0 is not None else [alpha_0]*len(r_support) betas_0 = betas_0 if betas_0 is not None else [beta_0]*len(r_support) ps = ps if ps is not None else [None]*len(r_support) self._fixedr_distns = \ [self._fixedr_class(r=r,p=p,alpha_0=alpha_0,beta_0=beta_0) for r,p,alpha_0,beta_0 in zip(r_support,ps,alphas_0,betas_0)] # for init self.ridx = sample_discrete(r_probs) self.r = r_support[self.ridx]
def resample(self,data=[]): n, alpha_n, posterior_discrete, r_support = self._posterior_hypparams( *self._get_statistics(data)) # NOTE: pass out r_support b/c feasible subset self.r = r_support[sample_discrete(posterior_discrete)] self.p = np.random.beta(alpha_n - n*self.r, self.beta_0 + n*self.r)
def _resample_r(self,data): self.ridx = sample_discrete( self._posterior_hypparams(self._get_statistics(data))) self.r = self.r_support[self.ridx] return self
def _resample_r_from_mf(self): lognorm = logsumexp(self.rho_mf) self.ridx = sample_discrete(np.exp(self.rho_mf - lognorm)) self.r = self.r_support[self.ridx]
def rvs(self,size=None): return sample_discrete(self.weights,size)
def rvs(self, size=None): return sample_discrete(self.weights, size)
def resample_Z_python(self): from pybasicbayes.util.stats import sample_discrete # TODO: Call cython function to resample parents S, C, Z, dt_max = self.S, self.C, self.Z, self.dt_max lambda0 = self.model.bias_model.lambda0 W = self.model.weight_model.W impulse = self.model.impulse_model.impulse # Also compute number of parents assigned to background rate and # to specific connections self.bkgd_ss = np.zeros(self.K) self.weight_ss = np.zeros((self.K, self.K)) self.imp_ss = np.zeros((self.K, self.K)) # Resample parents for n in range(self.N): if n == 0: Z[n] = -1 self.bkgd_ss[C[n]] += 1 continue # Compute the probability of each parent spike p_par = np.zeros(n) denom = 0 # First parent is just the background rate of this process p_bkgd = lambda0[C[n]] denom += p_bkgd # Iterate backward from the most recent to compute probabilities of each parent spike for par in range(n - 1, -1, -1): dt = S[n] - S[par] # Since the spikes are sorted, we can stop if we reach a potential # parent that occurred greater than dt_max in the past if dt > dt_max: p_par[par] = 0 break p_par[par] = W[C[par], C[n]] * impulse(dt, C[par], C[n]) denom += p_par[par] # Now sample forward, starting from the minimum viable parent min_par = par p_par = np.concatenate([[p_bkgd], p_par[min_par:n]]) # Sample from the discrete distribution p_par i_par = sample_discrete(p_par) if i_par == 0: # Sampled the background rate Z[n] = -1 self.bkgd_ss[C[n]] += 1 else: # Sampled one of the preceding spikes Z[n] = (i_par - 1) + min_par Cp = C[Z[n]] dt = S[n] - S[Z[n]] self.weight_ss[Cp, C[n]] += 1 self.imp_ss[Cp, C[n]] += np.log(dt) - np.log(dt_max - dt)
def resample_Z_python(self): from pybasicbayes.util.stats import sample_discrete # TODO: Call cython function to resample parents S, C, Z, dt_max = self.S, self.C, self.Z, self.dt_max lambda0 = self.model.bias_model.lambda0 W = self.model.weight_model.W_effective impulse = self.model.impulse_model.impulse translate_dt = self.model.impulse_model.translate_dt # Also compute number of parents assigned to background rate and # to specific connections self.bkgd_ss = np.zeros(self.K) self.weight_ss = np.zeros((self.K, self.K)) self.imp_ss = np.zeros((3, self.K, self.K)) # Resample parents for n in range(self.N): if n == 0: Z[n] = -1 self.bkgd_ss[C[n]] += 1 continue # Compute the probability of each parent spike p_par = np.zeros(n) denom = 0 # First parent is just the background rate of this process p_bkgd = lambda0[C[n]] denom += p_bkgd # Iterate backward from the most recent to compute probabilities of each parent spike for par in range(n - 1, -1, -1): dt = S[n] - S[par] if dt < 1e-8: continue if dt > dt_max - 1e-8: break p_par[par] = W[C[par], C[n]] * impulse(dt, C[par], C[n]) denom += p_par[par] # Now sample forward, starting from the minimum viable parent min_par = par p_par = np.concatenate([[p_bkgd], p_par[min_par:n]]) # Sample from the discrete distribution p_par i_par = sample_discrete(p_par) if i_par == 0: # Sampled the background rate Z[n] = -1 self.bkgd_ss[C[n]] += 1 else: # Sampled one of the preceding spikes Z[n] = (i_par - 1) + min_par Cp = C[Z[n]] # dt = S[n] - S[Z[n]] dt = translate_dt(S[n] - S[Z[n]]) self.weight_ss[Cp, C[n]] += 1 self.imp_ss[0, Cp, C[n]] += 1 self.imp_ss[1, Cp, C[n]] += np.log(dt) - np.log(dt_max - dt) self.Z = Z # sum of squares of impulse responses mu = np.divide(self.imp_ss[1], self.imp_ss[0]) for n in range(self.N): par = Z[n] if par > -1: # dt = S[n] - S[par] dt = translate_dt(S[n] - S[par]) sdt = np.log(dt) - np.log(dt_max - dt) self.imp_ss[2, C[par], C[n]] += (sdt - mu[C[par], C[n]])**2
def resample_Z_python(self): from pybasicbayes.util.stats import sample_discrete # TODO: Call cython function to resample parents S, C, Z, dt_max = self.S, self.C, self.Z, self.dt_max lambda0 = self.model.bias_model.lambda0 W = self.model.weight_model.W impulse = self.model.impulse_model.impulse # Also compute number of parents assigned to background rate and # to specific connections self.bkgd_ss = np.zeros(self.K) self.weight_ss = np.zeros((self.K, self.K)) self.imp_ss = np.zeros((self.K, self.K)) # Resample parents for n in range(self.N): if n == 0: Z[n] = -1 self.bkgd_ss[C[n]] += 1 continue # Compute the probability of each parent spike p_par = np.zeros(n) denom = 0 # First parent is just the background rate of this process p_bkgd = lambda0[C[n]] denom += p_bkgd # Iterate backward from the most recent to compute probabilities of each parent spike for par in range(n-1, -1, -1): dt = S[n] - S[par] # Since the spikes are sorted, we can stop if we reach a potential # parent that occurred greater than dt_max in the past if dt > dt_max: p_par[par] = 0 break p_par[par] = W[C[par], C[n]] * impulse(dt, C[par], C[n]) denom += p_par[par] # Now sample forward, starting from the minimum viable parent min_par = par p_par = np.concatenate([[p_bkgd], p_par[min_par:n]]) # Sample from the discrete distribution p_par i_par = sample_discrete(p_par) if i_par == 0: # Sampled the background rate Z[n] = -1 self.bkgd_ss[C[n]] += 1 else: # Sampled one of the preceding spikes Z[n] = (i_par - 1) + min_par Cp = C[Z[n]] dt = S[n] - S[Z[n]] self.weight_ss[Cp, C[n]] += 1 self.imp_ss[Cp, C[n]] += np.log(dt) - np.log(dt_max - dt)
def _generate(self,N): # run a CRP forwards alpha_0 = self.alpha_0 self.z = np.zeros(N,dtype=np.int32) for n in range(N): self.z[n] = sample_discrete(np.concatenate((np.bincount(self.z[:n]),(alpha_0,))))