def regression_data(seed, data_count=500): """ Generate data from a noisy sine wave. :param seed: random number seed :param data_count: number of data points. :return: """ np.random.seed(seed) noise_var = 0.1 x = np.linspace(-4, 4, data_count) y = 1 * np.sin(x) + np.sqrt(noise_var) * npr.randn(data_count) train_count = int(0.2 * data_count) idx = npr.permutation(range(data_count)) x_train = x[idx[:train_count], np.newaxis] x_test = x[idx[train_count:], np.newaxis] y_train = y[idx[:train_count]] y_test = y[idx[train_count:]] mu = np.mean(x_train, 0) std = np.std(x_train, 0) x_train = (x_train - mu) / std x_test = (x_test - mu) / std mu = np.mean(y_train, 0) std = np.std(y_train, 0) y_train = (y_train - mu) / std train_stats = dict() train_stats['mu'] = mu train_stats['sigma'] = std return x_train, y_train, x_test, y_test, train_stats
def split_into_batches(data, seq_len, num_seqs=None, permute=True): batches = npr.permutation(flatmap(split_array(length=seq_len), data)) if num_seqs is None: return batches, len(batches) chunks = (batches[i * num_seqs:(i + 1) * num_seqs] for i in xrange(len(batches) // num_seqs)) return imap(np.stack, chunks), len(batches) // num_seqs
def make_batches(batch_size, data): N = data[0].shape[0] perm = npr.permutation(N) slices = [slice(i, min(i+batch_size, N)) for i in range(0, N, batch_size)] for sl in slices: yield [d[perm[sl]] for d in data]
def make_batches(batch_size, data): N = data[0].shape[0] perm = npr.permutation(N) slices = [ slice(i, min(i + batch_size, N)) for i in range(0, N, batch_size) ] for sl in slices: yield [d[perm[sl]] for d in data]
def make_pinwheel_data(radial_std, tangential_std, num_classes, num_per_class, rate): rads = np.linspace(0, 2*np.pi, num_classes, endpoint=False) features = npr.randn(num_classes*num_per_class, 2) \ * np.array([radial_std, tangential_std]) features[:,0] += 1. labels = np.repeat(np.arange(num_classes), num_per_class) angles = rads[labels] + rate * np.exp(features[:,0]) rotations = np.stack([np.cos(angles), -np.sin(angles), np.sin(angles), np.cos(angles)]) rotations = np.reshape(rotations.T, (-1, 2, 2)) return 10*npr.permutation(np.einsum('ti,tij->tj', features, rotations))
def adadelta(paramvec, loss, batches, epochs=1, rho=0.95, epsilon=1e-6, callback=None): sum_gsq = np.zeros_like(paramvec) sum_usq = np.zeros_like(paramvec) vals = [] for epoch in range(epochs): permuted_batches = [batches[i] for i in npr.permutation(len(batches))] for im, angle in permuted_batches: val, g = vgrad(loss)(paramvec, im, angle) sum_gsq = rho*sum_gsq + (1.-rho)*g**2 ud = -np.sqrt(sum_usq + epsilon) / np.sqrt(sum_gsq + epsilon) * g sum_usq = rho*sum_usq + (1.-rho)*ud**2 paramvec = paramvec + ud vals.append(val) if callback: callback(epoch, paramvec, vals, permuted_batches) return paramvec
def adadelta(paramvec, loss, batches, epochs=1, rho=0.95, epsilon=1e-6, callback=None): sum_gsq = np.zeros_like(paramvec) sum_usq = np.zeros_like(paramvec) vals = [] for epoch in range(epochs): permuted_batches = [batches[i] for i in npr.permutation(len(batches))] for im, angle in permuted_batches: val, g = vgrad(loss)(paramvec, im, angle) sum_gsq = rho * sum_gsq + (1. - rho) * g**2 ud = -np.sqrt(sum_usq + epsilon) / np.sqrt(sum_gsq + epsilon) * g sum_usq = rho * sum_usq + (1. - rho) * ud**2 paramvec = paramvec + ud vals.append(val) if callback: callback(epoch, paramvec, vals, permuted_batches) return paramvec
plot_trial(tr) plot_trial_particles(tr) sim_ys_1 = simulate_accumulator(test_acc, us, num_repeats=3) sim_ys_2 = simulate_accumulator(test_acc_pem, us, num_repeats=3) true_psths = plot_psths(ys, us, 1, N); sim_psths_1 = plot_psths(sim_ys_1, us+us+us, 1, N); sim_psths_2 = plot_psths(sim_ys_2, us+us+us, 1, N); r2_lem = compute_r2(true_psths, sim_psths_1) r2_mf = compute_r2(true_psths, sim_psths_2) psth_list=[true_psths, sim_psths_1, sim_psths_2] plot_neurons2 = npr.permutation(np.arange(N))[:3] plot_multiple_psths(psth_list, plot_neurons2) plt.gcf().axes[3].set_ylim(plt.gcf().axes[0].get_ylim()) plt.gcf().axes[6].set_ylim(plt.gcf().axes[0].get_ylim()) plt.gcf().axes[4].set_ylim(plt.gcf().axes[1].get_ylim()) plt.gcf().axes[7].set_ylim(plt.gcf().axes[1].get_ylim()) plt.gcf().axes[5].set_ylim(plt.gcf().axes[2].get_ylim()) plt.gcf().axes[8].set_ylim(plt.gcf().axes[2].get_ylim()) # compare z z_dir = [] z_t = [] z_dir_bbvi = [] z_t_bbvi = [] true_z_dir = []
def run_naive_mcmc(Ys, A, Cs, etasq, sigmasq_W, W_true, Ps_true, num_iters=500, num_mh_per_iter=1000, W_init=None, Ps_init=None, do_update_W=True): # Iterate between solving for W | Ps and Ps | W M, T, N = Ys.shape assert A.shape == (N, N) # W = np.sqrt(sigmasq_W) * npr.randn(N, N) W = W_init if W_init is not None else np.sqrt(sigmasq_W) * npr.randn(N, N) # Initialize permutations and ensure they are valid Ps = Ps_init if Ps_init is not None else \ np.array([perm_to_P(npr.permutation(N)) for _ in range(M)]) for m, (P, C) in enumerate(zip(Ps, Cs)): P = round_to_perm(P - 1e8 * (1 - C)) assert np.sum(P[C]) == N Ps[m] = P sigmasq_W = 10 def _update_W(Ys, A, Ps, etasq): # Collect covariates Xs = [] for Y, P in zip(Ys, Ps): Xs.append(np.dot(Y, P.T)) X = np.vstack(Xs) W = np.zeros((N, N)) for n in range(N): if np.sum(A[n]) == 0: continue xn = X[1:, n] Xpn = X[:-1][:, A[n]] Jn = np.dot(Xpn.T, Xpn) / etasq + sigmasq_W * np.eye(A[n].sum()) Sign = np.linalg.inv(Jn) hn = np.dot(Xpn.T, xn) / etasq W[n, A[n]] = npr.multivariate_normal(np.dot(Sign, hn), Sign) return W # Identify the uncertain rows ahead of time def _naive_mh_step(Pm, Ym, A, W, Cm, curr_ll=None): # Randomly choose two neurons to swap unknowns = np.where(Cm.sum(axis=1) > 1)[0] n1, n2 = npr.choice(unknowns, 2, replace=False) v1 = np.where(Pm[n1])[0][0] v2 = np.where(Pm[n2])[0][0] if not Cm[n1, v2] or not Cm[n2, v1]: return Pm, curr_ll # Forward and Backward proposal probabilities are the same # so we just need to evaluate the log likelihoods curr_ll = curr_ll if curr_ll is not None else \ log_likelihood_single_worm(Ym, A, W, Pm, etasq) P_prop = Pm.copy() P_prop[n1] = Pm[n2] P_prop[n2] = Pm[n1] prop_ll = log_likelihood_single_worm(Ym, A, W, P_prop, etasq) # Randomly accept or reject if np.log(npr.rand()) < prop_ll - curr_ll: return P_prop, prop_ll else: return Pm.copy(), curr_ll # Sample Pm | W with Metropolis Hastings def _update_Pm(Ym, A, W, Cm): Pm = Ps[m] curr_ll = None for _ in range(num_mh_per_iter): Pm, curr_ll = _naive_mh_step(Pm, Ym, A, W, Cm, curr_ll=curr_ll) # Pm, curr_ll = _smart_mh_step(Pm, Ym, A, W, Cm, curr_ll=curr_ll) # Check validity assert Pm[Cm].sum() == N return Pm lls = [] mses = [] num_corrects = [] W_samples = [] Ps_samples = [] times = [] def collect_stats(W, Ps): times.append(time.time()) lls.append(log_likelihood(Ys, A, W, Ps, etasq) / (M * T * N)) W_samples.append(W) Ps_samples.append(Ps) mses.append(np.mean((W * A - W_true * A)**2)) # Round doubly stochastic matrix P to the nearest permutation matrix num_correct = np.zeros(M) for m, P in enumerate(Ps): row, col = linear_sum_assignment(-P + 1e8 * (1 - Cs[m])) num_correct[m] = n_correct(perm_to_P(col), Ps_true[m]) num_corrects.append(num_correct) def callback(W, Ps, t): collect_stats(W, Ps) print( "MCMC Iteration {}. LL: {:.4f} MSE(W): {:.4f} Num Correct: {}". format(t, lls[-1], mses[-1], num_corrects[-1])) # Run the MCMC algorithm callback(W, Ps, -1) for itr in range(num_iters): # Resample weights if do_update_W: W = _update_W(Ys, A, Ps, etasq) # Resample permutations for m in range(M): Ps[m] = _update_Pm(Ys[m], A, W, Cs[m]) callback(W, Ps, itr) times = np.array(times) times -= times[0] return times, np.array(lls), np.array(mses), np.array(num_corrects)
def simulate_celegans(A, posx, M, T, num_given, dthresh=0.01, sigmasq_W=None, etasq=0.1, spectral_factor=1.0): N = A.shape[0] rho = np.mean(A.sum(0)) # Set sigmasq_W for stability sigmasq_W = sigmasq_W if sigmasq_W is not None else 1. / (1.1 * N * rho) W = (npr.randn(N, N) * A) W = (W - W.T) / 2 #W =np.identity(N) * A eigmax = np.max(abs(np.linalg.eig(W)[0])) W = W / (spectral_factor * eigmax) assert np.max(abs(np.linalg.eigvals(A * W)) <= 1.00001) # Make a global constraint matrix based on x-position if type(dthresh) is not str: C = np.eye(N, dtype=bool) dpos = abs(posx[:, None] - posx[None, :]) C[dpos < dthresh] = True else: C = np.ones((N, N), dtype=bool) # Sample permutations for each worm perms = [] Ps = np.zeros((M, N, N)) for m in range(M): # perm[i] = index of neuron i in worm m's neurons perm = npr.permutation(N) perms.append(perm) Ps[m, np.arange(N), perm] = 1 #Ps[m, np.arange(N),np.arange(N)] = 1 # Make constraint matrices for each worm Cs = np.zeros((M, N, N), dtype=bool) for m, (Cm, Pm, permm) in enumerate(zip(Cs, Ps, perms)): # C is in canonical x canonical # make it canonical x worm[m] order Cm = C.dot(Pm) # Randomly choose a handful of given neurons given = npr.choice(N, replace=False, size=num_given) Cm[given, :] = 0 Cm[:, permm[given]] = 0 Cm[given, permm[given]] = 1 Cs[m] = Cm assert np.sum(Pm * Cm) == N # Sample some data! Ys = np.zeros((M, T, N)) for m in range(M): Ys[m, 0, :] = np.ones(N) Wm = Ps[m].T.dot((W * A).dot(Ps[m])) for t in range(1, T): mu_mt = np.dot(Wm, Ys[m, t - 1, :]) Ys[m, t, :] = mu_mt + np.sqrt(etasq) * npr.randn(N) return Ys, A, W, Ps, Cs
# discard first half and randomly permute th_samples = th_samples[Nsamps/2:, :] ll_samps = ll_samps[Nsamps/2:] chain_perm = np.random.permutation(th_samples.shape[0])[0:2500] chain_perm = np.arange(2500) # assemble a few thousand samples B0 = parser.get(th_samples[0], 'betas') B_samps = np.zeros((len(chain_perm), B0.shape[0], B0.shape[1])) for i, idx in enumerate(chain_perm): betas = K_chol.dot(parser.get(th_samples[idx, :], 'betas').T).T B_samp = np.exp(betas) B_samp /= np.sum(B_samp * lam0_delta, axis=1, keepdims=True) B_samps[i, :, :] = B_samp B_chains.append(B_samps) B_samps = np.vstack(B_chains) B_samps = B_samps[npr.permutation(B_samps.shape[0]), :, :] B_mle = load_basis(num_bases = NUM_BASES, split_type = SPLIT_TYPE, lam_subsample = LAM_SUBSAMPLE) lam0, lam0_delta = ru.get_lam0(lam_subsample=LAM_SUBSAMPLE) def get_basis_sample(idx, mle = False): """ Method to return a basis sample to condition on (or the MLE if specified) """ if mle: return B_mle else: return B_samps[idx] ########################################################################## ## Load in spectroscopically measured quasars + fluxes
def split_into_batches(data, seq_len, num_seqs=None, permute=True): batches = npr.permutation(flatmap(partial(split_array, length=seq_len), data)) if num_seqs is None: return batches, len(batches) chunks = (batches[i*num_seqs:(i+1)*num_seqs] for i in xrange(len(batches) // num_seqs)) return itertools.imap(np.stack, chunks), len(batches) // num_seqs