def binary_ufunc_check(fun, lims_A=[-2, 2], lims_B=[-2, 2], test_complex=True): T_A = lambda x: transform(lims_A, x) T_B = lambda x: transform(lims_B, x) scalar_int = 1 scalar = 0.6 vector = npr.rand(2) mat = npr.rand(3, 2) mat2 = npr.rand(1, 2) combo_check( fun, (0, 1), [T_A(scalar), T_A(scalar_int), T_A(vector), T_A(mat), T_A(mat2)], [T_B(scalar), T_B(scalar_int), T_B(vector), T_B(mat), T_B(mat2)]) if test_complex: comp = 0.6 + 0.3j matc = npr.rand(3, 2) + 0.1j * npr.rand(3, 2) combo_check( fun, (0, 1), [T_A(scalar), T_A(comp), T_A(vector), T_A(matc), T_A(mat2)], [T_B(scalar), T_B(comp), T_B(vector), T_B(matc), T_B(mat2)])
def unary_ufunc_check(fun, lims=[-2, 2]): scalar_int = transform(lims, 1) scalar = transform(lims, 0.4) vector = transform(lims, npr.rand(2)) mat = transform(lims, npr.rand(3, 2)) mat2 = transform(lims, npr.rand(1, 2)) combo_check(fun, (0,), [scalar_int, scalar, vector, mat, mat2])
def binary_ufunc_check(fun, lims_A=[-2, 2], lims_B=[-2, 2]): T_A = lambda x : transform(lims_A, x) T_B = lambda x : transform(lims_B, x) scalar = 0.6 vector = npr.rand(2) mat = npr.rand(3, 2) mat2 = npr.rand(1, 2) combo_check(fun, (0, 1), [T_A(scalar), T_A(vector), T_A(mat), T_A(mat2)], [T_B(scalar), T_B(vector), T_B(mat), T_B(mat2)])
def test_third_derivative(): fun = lambda x : np.sin(np.sin(x) + np.sin(x)) df = grad(fun) ddf = grad(fun) dddf = grad(fun) check_grads(fun, npr.randn()) check_grads(df, npr.rand()) check_grads(ddf, npr.rand()) check_grads(dddf, npr.rand())
def unary_ufunc_check(fun, lims=[-2, 2], test_complex=True): scalar_int = transform(lims, 1) scalar = transform(lims, 0.4) vector = transform(lims, npr.rand(2)) mat = transform(lims, npr.rand(3, 2)) mat2 = transform(lims, npr.rand(1, 2)) combo_check(fun, (0,), [scalar_int, scalar, vector, mat, mat2]) if test_complex: comp = transform(lims, 0.4) + 0.1j * transform(lims, 0.3) matc = transform(lims, npr.rand(3, 2)) + 0.1j * npr.rand(3, 2) combo_check(fun, (0,), [comp, matc])
def test_power_arg0(): # the +1.'s here are to avoid regimes where numerical diffs fail make_fun = lambda y: lambda x: np.power(x, y) fun = make_fun(npr.randn()**2 + 1.) d_fun = grad(fun) check_grads(fun, npr.rand()**2 + 1.) check_grads(d_fun, npr.rand()**2 + 1.) # test y == 0. as a special case, c.f. #116 fun = make_fun(0.) assert grad(fun)(0.) == 0. assert grad(grad(fun))(0.) == 0.
def binary_ufunc_check_no_same_args(fun, lims_A=[-2, 2], lims_B=[-2, 2], test_complex=True, **kwargs): T_A = lambda x : transform(lims_A, x) T_B = lambda x : transform(lims_B, x) scalar1 = 0.6; scalar2 = 0.7 vector1 = npr.rand(2); vector2 = npr.rand(2) mat11 = npr.rand(3, 2); mat12 = npr.rand(3, 2) mat21 = npr.rand(1, 2); mat22 = npr.rand(1, 2) check = combo_check(fun, (0, 1), **kwargs) check([T_A(scalar1), T_A(vector1), T_A(mat11), T_A(mat21)], [T_B(scalar2), T_B(vector2), T_B(mat12), T_B(mat22)]) if test_complex: comp1 = 0.6 + 0.3j; comp2 = 0.1 + 0.2j matc1 = npr.rand(3, 2) + 0.1j * npr.rand(3, 2) matc2 = npr.rand(3, 2) + 0.1j * npr.rand(3, 2) check([T_A(scalar1), T_A(comp1), T_A(vector1), T_A(matc1), T_A(mat21)], [T_B(scalar2), T_B(comp2), T_B(vector2), T_B(matc2), T_B(mat22)])
def test_norm_logpdf(): x = npr.randn() l = npr.randn() scale=npr.rand()**2 + 1.1 fun = autograd.scipy.stats.norm.logpdf d_fun = grad(fun) check_grads(fun, x, l, scale) check_grads(d_fun, x, l, scale)
def init_pgm_param(K, N, alpha, niw_conc=10., random_scale=0.): def init_niw_natparam(N): nu, S, m, kappa = N+niw_conc, (N+niw_conc)*np.eye(N), np.zeros(N), niw_conc m = m + random_scale * npr.randn(*m.shape) return niw.standard_to_natural(S, m, kappa, nu) dirichlet_natparam = alpha * (npr.rand(K) if random_scale else np.ones(K)) niw_natparam = np.stack([init_niw_natparam(N) for _ in range(K)]) return dirichlet_natparam, niw_natparam
def variational_objective(params): """Provides a stochastic estimate of the variational lower bound.""" u1_mean, u1_cov_fac,h_mean, h_cov_fac = unpack_params(params,'1') u2_mean,u2_cov_fac,_,_ = unpack_params(params,'2') log_prob=0 for i in range(num_samples): sample_u1 = u1_cov_fac @npr.rand(m,1) + u1_mean sample_u2 = u2_cov_fac @ npr.rand(m,1) + u2_mean sample_h = h_cov_fac*h_cov_fac* npr.rand(n,1) + h_mean log_prob=log_prob+logprob(params,sample_u1,X,sample_h,'1') log_prob=log_prob+logprob(params,sample_u2,sample_h,Y,'2') log_prob=log_prob/num_samples #entropy=mvn.entropy(np.reshape(u1_mean,-1), (u1_cov_fac @ u1_cov_fac.T)) #entropy=entropy-mvn.entropy(np.reshape(u2_mean,-1), (u2_cov_fac @ u2_cov_fac.T)) #entropy=entropy-mvn.entropy(np.reshape(h_mean,-1), np.diag(h_mean) @ np.diag(h_mean)) print(log_prob)#+entropy) return -(log_prob)#+entropy)
def reward_function(action_chosen, label): if action_chosen == 0 and label == 1: # we chose to eat a poisonous mushroom with probability 1/2 we get really punished if npr.rand() > 0.5: reward = -35 else: reward = 0 elif action_chosen == 0 and label == 0: reward = 5 else: # we chose not to eat, so we get no reward reward = 0 if label == 1: oracle_reward = 0 else: oracle_reward = 5 return reward, oracle_reward
def test_power_arg1_zero(): fun = lambda y: np.power(0., y) d_fun = grad(fun) check_grads(fun, npr.rand()**2) check_grads(d_fun, npr.rand()**2)
def __init__(self, K, D, M=0): super(StationaryTransitions, self).__init__(K, D, M=M) Ps = .95 * np.eye(K) + .05 * npr.rand(K, K) Ps /= Ps.sum(axis=1, keepdims=True) self.log_Ps = np.log(Ps)
def random_diag_nn_potentials(n, T): return -1. / 2 * npr.rand(T, n), npr.randn(T, n), npr.randn(T)
def sample(self, z, x, input=None, tag=None): T = z.shape[0] z = np.zeros_like(z, dtype=int) if self.single_subspace else z ps = self.mean(self.forward(x, input, tag)) return (npr.rand(T, self.N) < ps[np.arange(T), z, :]).astype(int)
# input is sum of u_r and u_l u = 1.0*np.array(u).T z, x, y = latent_acc.sample(T, input=u) us.append(u) zs.append(z) xs.append(x) ys.append(y) # initialize test model test_acc = LatentAccumulation(N, K, D, M=M, transitions="race", emissions="poisson", emission_kwargs={"bin_size":bin_size}, dynamics_kwargs={"learn_A":False}) betas0 = 0.02+0.08*npr.rand()*np.ones((D,)) sigmas0 = np.log((4e-5+3.5e-3*npr.rand()))*np.ones((D,)) test_acc.dynamics.params = (betas0, sigmas0) # test_acc.dynamics.params[2]) # Initialize C, d u_sum = np.array([np.sum(u[:,0] - u[:,1]) for u in us]) y_end = np.array([y[-10:] for y in ys]) y_U = y_end[np.where(u_sum>=25)] y_L = y_end[np.where(u_sum<=-25)] d_init = (np.mean([y[:5] for y in ys],axis=(0,1)) / bin_size).reshape((1,N)) C_init = np.hstack((np.mean(y_U,axis=(0,1))[:,None],np.mean(y_L,axis=(0,1))[:,None])) / bin_size - d_init.T test_acc.emissions.ds[0] = d_init test_acc.emissions.Cs[0] = C_init init_params = copy.deepcopy(test_acc.params) test_acc_vlem = copy.deepcopy(test_acc)
def test_power_arg1(): x = npr.randn()**2 fun = lambda y : np.power(x, y) d_fun = grad(fun) check_grads(fun, npr.rand()**2) check_grads(d_fun, npr.rand()**2)
def initialize_meanfield(label_global, node_potentials): T, K = node_potentials.shape[0], label_global.shape[0] return normalize(npr.rand(T, K))
def test_radians(): fun = lambda x: 3.0 * np.radians(x) d_fun = grad(fun) check_grads(fun, 10.0 * npr.rand()) check_grads(d_fun, 10.0 * npr.rand())
def test_degrees(): fun = lambda x: 3.0 * np.degrees(x) d_fun = grad(fun) check_grads(fun, 10.0 * npr.rand()) check_grads(d_fun, 10.0 * npr.rand())
def test_reciprocal(): fun = lambda x: np.reciprocal(x) d_fun = grad(fun) check_grads(fun, npr.rand()) check_grads(d_fun, npr.rand())
def test_negative(): fun = lambda x: np.negative(x) d_fun = grad(fun) check_grads(fun, npr.rand()) check_grads(d_fun, npr.rand())
def test_true_divide_arg1(): fun = lambda x, y: np.true_divide(x, y) d_fun = grad(fun, 1) check_grads(fun, npr.rand(), npr.rand()) check_grads(d_fun, npr.rand(), npr.rand())
def test_multiply_arg1(): fun = lambda x, y: np.multiply(x, y) d_fun = grad(fun, 1) check_grads(fun, npr.rand(), npr.rand()) check_grads(d_fun, npr.rand(), npr.rand())
def test_divide_arg0(): fun = lambda x, y: np.divide(x, y) d_fun = grad(fun) check_grads(fun, npr.rand(), npr.rand()) check_grads(d_fun, npr.rand(), npr.rand())
def initialize_hmm_parameters(num_states, num_outputs): init_pi = normalize(npr.rand(num_states)) init_A = normalize(npr.rand(num_states, num_states)) init_B = normalize(npr.rand(num_states, num_outputs)) return init_pi, init_A, init_B
def test_sinc(): fun = lambda x: 3.0 * np.sinc(x) d_fun = grad(fun) check_grads(fun, 10.0 * npr.rand()) check_grads(d_fun, 10.0 * npr.rand())
def initialize_local_meanfield(label_global, node_potentials): K = label_global.shape[0] T = node_potentials[0].shape[0] return normalize(npr.rand(T, K))
def make_dir_natparam(num_states): return alpha * np.ones(num_states) if not random else alpha + npr.rand( num_states)
def test_mod_arg1(): fun = lambda x, y: np.mod(x, y) d_fun = grad(fun, 1) check_grads(fun, npr.rand(), npr.rand()) check_grads(d_fun, npr.rand(), npr.rand())
def random_diag_nn_potentials(n, T): return -1./2*npr.rand(T, n), npr.randn(T, n), npr.randn(T)
import autograd.numpy as np import autograd.numpy.random as npr from autograd import grad import sklearn.metrics import pylab # Generando los datos ejemplos = 1000 caracteristicas = 100 D = (npr.randn(ejemplos, caracteristicas), npr.randn(ejemplos)) # Especificando la red units_capa1 = 10 units_capa2 = 1 w1 =npr.rand(caracteristicas, units_capa1) b1 = npr.rand(units_capa1) w2 = npr.rand(units_capa1, units_capa2) b2 = 0.0 theta = (w1, b1, w2, b2) # Función de costo def costo_cuadratico(y, y_barra): return np.dot((y - y_barra), (y - y_barra)) # Capa de salida def entropia_cruzada_binaria(y, y_barra): return np.sum(-((y * np.log(y_barra)) + ((1-y) * np.log(1 - y_barra)))) # Armando la red def red_neuronal(x, theta): w1, b1, w2, b2 = theta
def make_parameters(T, K): pi0 = np.ones(K) / K Ps = npr.rand(T-1, K, K) Ps /= Ps.sum(axis=2, keepdims=True) ll = npr.randn(T, K) return pi0, Ps, ll
# spliting the data into training and testing data sets #X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=50, # random_state=0) X_train = X Y_train = Y # some stats on data examples = Y_train.shape[0] features = X_train.shape[1] D = (X_train, Y_train) # Specify the network layer1_units = 20 layer2_units = 3 w1 = npr.rand(features, layer1_units) b1 = npr.rand(layer1_units) w2 = npr.rand(layer1_units, layer2_units) b2 = npr.rand(layer2_units) theta = (w1, b1, w2, b2) # Define the loss function (cross entropy) def cross_entropy(y, y_hat): return np.sum(-y * np.log(y_hat)) def sigmoid(x): return 1 / (1 + np.exp(-x))
def test_divide_arg0(): fun = lambda x, y : np.divide(x, y) d_fun = grad(fun) check_grads(fun, npr.rand(), npr.rand()) check_grads(d_fun, npr.rand(), npr.rand())
emission_kwargs=dict(link="softplus")) # Set rotational dynamics for k in range(K): true_slds.dynamics.As[k] = .95 * random_rotation( D, theta=(k + 1) * np.pi / 20) true_slds.dynamics.bs[k] = 3 * npr.randn(D) # Set an offset to make the counts larger # true_slds.emissions.ds += 10 # Sample data z, x, y = true_slds.sample(T) # Mask off some data mask = npr.rand(T, N) < 0.95 y_masked = y * mask # In[4]: plt.imshow(y.T, aspect="auto", interpolation="none") plt.xlabel("time") plt.ylabel("neuron") plt.colorbar() # In[5]: print("Fitting SLDS with SVI") slds = SLDS(N, K, D,
def test_true_divide_arg1(): fun = lambda x, y : np.true_divide(x, y) d_fun = grad(fun, 1) check_grads(fun, npr.rand(), npr.rand()) check_grads(d_fun, npr.rand(), npr.rand())
import autograd.numpy as np import autograd.numpy.random as npr from autograd import grad import sklearn.metrics import pylab # Generate Dataset examples = 1000 features = 100 D = (npr.randn(examples, features), npr.randn(examples)) # Specify the network layer1_units = 10 layer2_units = 1 w1 = npr.rand(features, layer1_units) b1 = npr.rand(layer1_units) w2 = npr.rand(layer1_units, layer2_units) b2 = 0.0 theta = (w1, b1, w2, b2) # Define the loss function def squared_loss(y, y_hat): return np.dot((y - y_hat),(y - y_hat)) # Output Layer def binary_cross_entropy(y, y_hat): return np.sum(-((y * np.log(y_hat)) + ((1-y) * np.log(1 - y_hat)))) # Wraper around the Neural Network def neural_network(x, theta): w1, b1, w2, b2 = theta
def test_negative(): fun = lambda x : np.negative(x) d_fun = grad(fun) check_grads(fun, npr.rand()) check_grads(d_fun, npr.rand())
plt.tight_layout() # fit SLDS model to ys # initialize test_acc = LatentAccumulation(N, K, D, M=M, transitions="ddmnlncollapsing", dynamics_kwargs={ "learn_V": False, "learn_A": False }, emissions="poisson", emission_kwargs={"bin_size": bin_size}) betas = np.array([0.0 + 0.08 * npr.rand()]) sigmas = np.log(5e-4 + 2.5e-3 * npr.rand()) * np.ones((D, )) test_acc.dynamics.params = (betas, sigmas) + test_acc.dynamics.params[2:] test_acc.initialize(ys, inputs=us) init_params = copy.deepcopy(test_acc.params) # fit q_elbos, q_lem = test_acc.fit(ys, inputs=us, method="laplace_em", variational_posterior="structured_meanfield", num_iters=50, alpha=0.5, initialize=False) plt.ion()
def test_degrees(): fun = lambda x : 3.0 * np.degrees(x) d_fun = grad(fun) check_grads(fun, 10.0*npr.rand()) check_grads(d_fun, 10.0*npr.rand())
def WMR(dt=0.001, theta=float('nan'), delay_time=[], delay_max=2, delay_min=1, showplots=0, prior='uniform', algorithm=[]): ''' Generates time series for the Working Memory Ring task. Network must remember a stimulus that lies on a ring, and reproduce it at a specified time. Inputs: dt: Time step (default 0.001) theta: Angle of stimulus. Should be between -pi and pi. Randomly generated if not specified delay_time: How long to hold the interval in memory. If not specified, randomly generated. delay_max: Used to randomly generate a delay delay_min: Same. showplots: If 1, creates a plot showing target, inputs, and hints prior: Specifies type of distribution from which theta is drawn. 'uniform': Default, uniform from -pi to pi 'four': Mixture of 4 Gaussians between -pi and pi, periodic BCs 'six': Same as above, but with 6 slightly narrower Gaussians algorithm: Specify which you need inputs for, as the time series might very 'grad' 'full-FORCE' Outputs: inp: For input into network. Will have two columns (rhythm and trigger) targ: Target output. Four columns (one for each tap) hints: Hints for full-FORCE training. Three columns (one for each interval) theta: Stimulus angle targ_idx: Indices where target is specified response_idx: Index where response should be recorded trigger_idx: Start of response trigger stim_idx: Where the stimulus begins delay_idx: Where the delay begins ''' def TTS(T, dt): # Convert Time to Steps return int(round(T / dt)) # Check that the algorithm is valid if algorithm != 'full-FORCE' and algorithm != 'grad': raise ValueError( 'Please choose a valid training algorithm setting. See documentation for details' ) # If theta isn't specified, choose it from some distribution if np.isnan(theta): if prior == 'uniform': theta = np.pi * (np.random.rand() * 2 - 1) # Biased: elif prior == 'four': q = np.random.choice(np.arange(0, 1, 1 / 4)) theta = (np.pi * (2 * np.mod(np.random.normal(q, 0.06), 1) - 1)) elif prior == 'six': q = np.random.choice(np.arange(0, 1, 1 / 6)) theta = (np.pi * (2 * np.mod(np.random.normal(q, 0.04), 1) - 1)) # Pick a delay time, if it's not specified if not delay_time: delay_time = npr.rand() * (delay_max - delay_min) + delay_min x = np.cos(theta) y = np.sin(theta) fix_time = 0.3 sample_time = 0.2 trigger_time = 0.1 reaction_time = 0.2 response_time = 0.2 if algorithm == 'full-FORCE': iti_time = 0.3 else: iti_time = 0 fix_steps = TTS(fix_time, dt) sample_steps = TTS(sample_time, dt) delay_steps = TTS(delay_time, dt) trigger_steps = TTS(trigger_time, dt) reaction_steps = TTS(reaction_time, dt) response_steps = TTS(response_time, dt) iti_steps = TTS(iti_time, dt) total_steps = (fix_steps + sample_steps + delay_steps + trigger_steps + reaction_steps + response_steps + iti_steps) show_stim = fix_steps show_trigger = show_stim + sample_steps + delay_steps show_response = show_trigger + trigger_steps + reaction_steps x_input = np.zeros((total_steps, 1)) y_input = np.zeros((total_steps, 1)) trigger = np.zeros((total_steps, 1)) x_targ = np.zeros((total_steps, 1)) y_targ = np.zeros((total_steps, 1)) x_hint = np.zeros((total_steps, 1)) y_hint = np.zeros((total_steps, 1)) x_input[show_stim:show_stim + sample_steps, 0] = x y_input[show_stim:show_stim + sample_steps, 0] = y trigger[show_trigger:show_trigger + trigger_steps, 0] = 1 x_targ[show_trigger + trigger_steps:show_response, 0] = np.linspace(0, x, reaction_steps) x_targ[show_response:show_response + response_steps, 0] = np.linspace(x, 0, response_steps) y_targ[show_trigger + trigger_steps:show_response, 0] = np.linspace(0, y, reaction_steps) y_targ[show_response:show_response + response_steps, 0] = np.linspace(y, 0, response_steps) x_hint[show_stim:show_trigger, 0] = x / 2 y_hint[show_stim:show_trigger, 0] = y / 2 inputs = np.hstack((x_input, y_input, trigger)) targets = np.hstack((x_targ, y_targ)) hints = np.hstack((x_hint, y_hint)) targ_idx = np.arange(show_trigger + trigger_steps, show_response + response_steps) if showplots: plt.figure() plt.plot(inputs, 'b') plt.plot(targets, 'r--') plt.plot(hints, 'g--') plt.plot() plt.show() inps_and_targs = { 'inps': inputs, 'targs': targets, 'hints': hints, 'targ_idx': targ_idx, 'theta': theta, 'response_idx': show_response, 'trigger_idx': show_trigger, 'stim_idx': show_stim, 'delay_idx': show_stim + sample_steps } return inps_and_targs
def test_power_arg0(): y = npr.randn()**2 + 1.0 fun = lambda x: np.power(x, y) d_fun = grad(fun) check_grads(fun, npr.rand()**2) check_grads(d_fun, npr.rand()**2)
def make_label_global_natparam(k, random): return alpha * np.ones(k) if not random else alpha + npr.rand(k)
def sample_y(self, z, x, input=None, tag=None): T = z.shape[0] z = np.zeros_like(z, dtype=int) if self.single_subspace else z ps = self.mean(self.compute_mus(x)) return npr.rand(T, self.N) < ps[np.arange(T), z,:]
def test_power_arg0(): y = npr.randn()**2 + 1.0 fun = lambda x : np.power(x, y) d_fun = grad(fun) check_grads(fun, npr.rand()**2) check_grads(d_fun, npr.rand()**2)
def test_mod_arg1(): fun = lambda x, y : np.mod(x, y) d_fun = grad(fun, 1) check_grads(fun, npr.rand(), npr.rand()) check_grads(d_fun, npr.rand(), npr.rand())
def test_power_arg1(): x = npr.randn()**2 fun = lambda y: np.power(x, y) d_fun = grad(fun) check_grads(fun, npr.rand()**2) check_grads(d_fun, npr.rand()**2)
def test_multiply_arg1(): fun = lambda x, y : np.multiply(x, y) d_fun = grad(fun, 1) check_grads(fun, npr.rand(), npr.rand()) check_grads(d_fun, npr.rand(), npr.rand())
def test_grad_fanout(): fun = lambda x : np.sin(np.sin(x) + np.sin(x)) df = grad(fun) check_grads(fun, npr.randn()) check_grads(df, npr.rand())
def test_reciprocal(): fun = lambda x : np.reciprocal(x) d_fun = grad(fun) check_grads(fun, npr.rand()) check_grads(d_fun, npr.rand())
# Importando los datos with open('Bias_correction_ucl.csv', mode='r', encoding='utf8') as DS: lector = csv.reader(DS, delimiter=',') DataSet = [] for datos in lector: DataSet.append(datos) DataSet = DataSet[1:] DataFrameY = np.array([dato[22] for dato in DataSet]) DataFrameX = np.array([dato[:21] for dato in DataSet]) DataSet = (DataFrameX, DataFrameY) # Parámetros de la red units_capa1 = 22 units_capa2 = 1 w1 = npr.rand(len(DataSet[0]), units_capa1) b1 = npr.rand(units_capa1) w2 = npr.rand(units_capa1, units_capa2) b2 = 0.0 theta = (w1, b1, w2, b2) # Función de costo def costo_cuadratico(y, y_barra): return np.dot((y - y_barra), (y - y_barra)) # Armando la red def red_neuronal(x, theta): w1, b1, w2, b2 = theta return np.tanh(np.dot((np.tanh(np.dot(x, w1) + b1)), w2) + b2)
def test_radians(): fun = lambda x : 3.0 * np.radians(x) d_fun = grad(fun) check_grads(fun, 10.0*npr.rand()) check_grads(d_fun, 10.0*npr.rand())
def sample_x(self, z, xhist, input=None, tag=None, with_noise=True): ps = 1 / (1 + np.exp(self.logit_ps)) return npr.rand(self.D) < ps[z]
def test_sinc(): fun = lambda x : 3.0 * np.sinc(x) d_fun = grad(fun) check_grads(fun, 10.0*npr.rand()) check_grads(d_fun, 10.0*npr.rand())
def gen_redshift_samples_tempering(Nchains, Nsamps, INIT_REDSHIFT, lnpdf, dlnpdf, USE_MLE): """ Generate posterior samples of red-shift using HMC + Parallel Tempering """ print "=== PARALLEL TEMPERING WITH %d CHAINS === "%Nchains # grab basis for dimensions B = get_basis_sample(0, USE_MLE) # set up tempering parameters z_inits = np.linspace(.5, 3.0, Nchains) temps = np.linspace(.2, 1., Nchains) # set up a list of Nchains markov chains chains_samps = [np.zeros((Nsamps, B.shape[0] + 2)) for c in range(Nchains)] chains_lls = [np.zeros(Nsamps) for c in range(Nchains)] for ci, chs in enumerate(chains_samps): chs[0, :] = .001 * npr.randn(B.shape[0] + 2) chs[0, 0] = z_inits[ci] chs[0, -1] = np.log(INIT_MAG) chains_lls[ci][0] = temps[ci] * lnpdf(chs[0, :], B) ## sanity check gradient ru.check_grad(fun = lambda(x): temps[1] * lnpdf(x, B), jac = lambda(x): temps[1] * dlnpdf(x, B), th = chains_samps[1][0,:]) ## sample Naccepts = np.zeros(Nchains) Nswaps = 0 step_sizes = .005 * np.ones(Nchains) avg_rates = .9 * np.ones(Nchains) adapt_step = True print "{0:10}|{1:10}|{2:10}|{3:10}|{4:15}|{5:15}".format( " iter ", " ll ", " step_sz ", " Nswaps ", " Naccepts", " z (z_spec)") for s in np.arange(1, Nsamps): # stop adapting after warmup if s > Nsamps/2: adapt_step = False # Nchains HMC draws for ci in range(Nchains): B = get_basis_sample(s, USE_MLE) chains_samps[ci][s, :], P, step_sizes[ci], avg_rates[ci] = hmc( x_curr = chains_samps[ci][s-1,:], llhfunc = lambda(x): temps[ci] * lnpdf(x, B), grad_llhfunc = lambda(x): temps[ci] * dlnpdf(x, B), eps = step_sizes[ci], num_steps = STEPS_PER_SAMPLE, mass = 1., adaptive_step_sz = adapt_step, min_step_sz = 0.00005, avg_accept_rate = avg_rates[ci], tgt_accept_rate = .85) chains_lls[ci][s] = temps[ci] * lnpdf(chains_samps[ci][s, :], B) if chains_lls[ci][s] != chains_lls[ci][s-1]: Naccepts[ci] += 1 # propose swaps cascading down from first for ci in range(Nchains-1): # cache raw ll's for each (already computed) ll_ci = chains_lls[ci][s] / temps[ci] ll_ci_plus = chains_lls[ci+1][s] / temps[ci + 1] # propose swap between chain index ci and ci + 1 ll_prop = ll_ci_plus * temps[ci] + ll_ci * temps[ci+1] ll_curr = chains_lls[ci][s] + chains_lls[ci+1][s] if np.log(npr.rand()) < ll_prop - ll_curr: ci_samp = chains_samps[ci][s, :].copy() # move chain sample ci+1 into ci chains_samps[ci][s, :] = chains_samps[ci+1][s, :] chains_lls[ci][s] = ll_ci_plus * temps[ci] # move chain sample ci into ci + 1 chains_samps[ci+1][s, :] = ci_samp chains_lls[ci+1][s] = ll_ci * temps[ci+1] if ci+1 == Nchains - 1: Nswaps += 1 if s % 20 == 0: print "{0:10}|{1:10}|{2:10}|{3:10}|{4:15}|{5:15}".format( "%d/%d"%(s, Nsamps), " %2.4f"%chains_lls[-1][s], " %2.5f"%step_sizes[-1], " %d (%2.2f)"%(Nswaps, avg_rates[-1]), " (%d) (%d) (%d)"%(Naccepts[0], Naccepts[-2], Naccepts[-1]), " (hot: %2.2f), (cold: %2.2f), (pi: %2.2f) (true: %2.2f)"%( chains_samps[0][s, 0], chains_samps[-2][s, 0], chains_samps[-1][s, 0], z_n)) if s % 200: save_redshift_samples(chains_samps[-1], chains_lls[-1], q_idx=n, chain_idx="temper", use_mle=USE_MLE, K=B.shape[0], V=B.shape[1], qso_info = qso_n_info) ### save samples save_redshift_samples(chains_samps[-1], chains_lls[-1], q_idx=n, chain_idx="temper", use_mle=USE_MLE, K=B.shape[0], V=B.shape[1], qso_info = qso_n_info) #only return the chain we care about return chains_samps[-1], chains_lls[-1]