def test_planar_flow(input_cols, n_samples): params = dict() params["w"] = npr.normal(size=(input_cols, 1)) params["b"] = npr.normal(size=(1, n_samples)) params["u"] = npr.normal(size=(input_cols, 1)) z = npr.normal(size=(n_samples, input_cols)) out = planar_flow(params, z) assert out.shape == (n_samples, input_cols)
def spirals(N, start, end, a ,b, noise): interval = np.linspace(start, end , N) d1= [] d2 = [] d3 = [] d4 = [] for element in interval: d1.append((a* element -b) * np.sin(element) + normal(0, noise)) d2.append((a * element - b) * np.cos(element)+ normal(0, noise)) d3.append((-a* element +b) * np.sin(element)+ normal(0, noise)) d4.append((-a * element +b) * np.cos(element)+ normal(0, noise)) return np.array([d1, d2]).T, np.array([d3, d4]).T
def sample(natparams, num_samples=None): 'Takes a list of natural parameter tuples and produces an array of samples' mus, sigmasqs = unpack_params(natparams) T, n = mus.shape rand_shape = (T, num_samples, n) if num_samples else (T, n) return mus[:, None, :] + np.sqrt(sigmasqs)[:, None, :] * npr.normal( size=rand_shape)
def cycsgld_step(self, iters=1, cycles=10, total=1e6): sub_total = total / cycles self.r_remainder = (iters % sub_total) * 1.0 / sub_total cyc_lr = self.lr * 5 / 2 * (cos(pi * self.r_remainder) + 1) proposal = self.cycsgld_beta - cyc_lr * self.stochastic_grad( self.cycsgld_beta) + sqrt( 2 * cyc_lr * self.T) * normal(size=self.dim) if self.in_domain(proposal): self.cycsgld_beta = proposal
def main(argv): del argv n_clusters = FLAGS.num_clusters n_dimensions = FLAGS.num_dimensions n_observations = FLAGS.num_observations alpha = 3.3 * np.ones(n_clusters) a = 1. b = 1. kappa = 0.1 npr.seed(10001) # generate true latents and data pi = npr.gamma(alpha) pi /= pi.sum() mu = npr.normal(0, 1.5, [n_clusters, n_dimensions]) z = npr.choice(np.arange(n_clusters), size=n_observations, p=pi) x = npr.normal(mu[z, :], 0.5**2) # points used for initialization pi_est = np.ones(n_clusters) / n_clusters z_est = npr.choice(np.arange(n_clusters), size=n_observations, p=pi_est) mu_est = npr.normal(0., 0.01, [n_clusters, n_dimensions]) tau_est = 1. init_vals = pi_est, z_est, mu_est, tau_est # instantiate the model log joint log_joint = make_log_joint(x, alpha, a, b, kappa) # run mean field on variational mean parameters def callback(meanparams): fig = plot(meanparams, x) plt.savefig('/tmp/gmm_{:04d}.png'.format(itr)) plt.close(fig.number) start = time.time() cavi(log_joint, init_vals, (SIMPLEX, INTEGER, REAL, NONNEGATIVE), FLAGS.num_iterations, callback=lambda *args: None) runtime = time.time() - start print("CAVI Runtime (s): ", runtime)
def add(self, name, shape): """ Add a randomly initialized set of weights/biases to the WB class. It is initialized with mean 0 and variance 0.1 Parameters: =========== - name: (string) self_weights, nbr_weights, biases, or some other name. - shape: (tuple) the dimensions of the layer. """ self[name] = npr.normal(0, 0.001, shape)
def resgld_step(self, T_multiply=3, var=0.1): proposal_low = self.resgld_beta_low - self.lr * self.stochastic_grad( self.resgld_beta_low) + sqrt( 2 * self.lr * self.T) * normal(size=self.dim) if self.in_domain(proposal_low): self.resgld_beta_low = proposal_low proposal_high = self.resgld_beta_high - self.lr * self.stochastic_grad( self.resgld_beta_high) + sqrt( 2 * self.lr * self.T * T_multiply) * normal(size=self.dim) if self.in_domain(proposal_high): self.resgld_beta_high = proposal_high dT = 1 / self.T - 1 / (self.T * T_multiply) swap_rate = np.exp( dT * (self.stochastic_f(self.resgld_beta_low) - self.stochastic_f(self.resgld_beta_high) - dT * var)) intensity_r = 0.1 if np.random.uniform(0, 1) < intensity_r * swap_rate: self.resgld_beta_high, self.resgld_beta_low = self.resgld_beta_low, self.resgld_beta_high self.swaps += 1
def generate_regression_curve_data(x_min=0, x_max=0.5, num_samples=5000): x = np.linspace(x_min, x_max, num=num_samples) eps = npr.normal(0.0, 0.02, size=num_samples) #y = 2*(x+eps) + 5 #y_true = 0 y = x + 0.3 * np.sin(2 * np.pi * (x + eps)) + 0.3 * np.sin(4 * np.pi * (x + eps)) + eps y_true = x + 0.3 * np.sin(2 * np.pi * x) + 0.3 * np.sin(4 * np.pi * x) #y = (x+eps)**2 / 2 #y_true = x**2 / 2 return x, y, y_true
def csgld_step(self, iters): self.grad_mul = 1 + self.zeta * self.T * (np.log( self.Gcum[self.J]) - np.log(self.Gcum[self.J - 1])) / self.div_f proposal = self.csgld_beta - self.lr * self.grad_mul * self.stochastic_grad( self.csgld_beta) + sqrt( 2. * self.lr * self.T) * normal(size=self.dim) if self.in_domain(proposal): self.csgld_beta = proposal self.J = self.find_idx(self.csgld_beta) step_size = min(self.decay_lr, 10. / (iters**0.8 + 100)) self.Gcum[:self.J] = self.Gcum[:self.J] + step_size * self.Gcum[ self.J]**self.zeta * (-self.Gcum[:self.J]) self.Gcum[self.J] = self.Gcum[self.J] + step_size * self.Gcum[ self.J]**self.zeta * (1 - self.Gcum[self.J]) self.Gcum[(self.J + 1):] = self.Gcum[(self.J + 1):] + step_size * self.Gcum[ self.J]**self.zeta * (-self.Gcum[(self.J + 1):]) if self.grad_mul < 0: self.bouncy_move = self.bouncy_move + 1
def stochastic_f(self, beta): return self.f(beta.tolist()) + 0.32 * normal(size=1)
def rand_instance(leading_dims, nrhs, ndim, lower): tri = np.tril if lower else np.triu L = tri(npr.normal(size=leading_dims + (ndim, ndim))) + 10. * np.eye(ndim) x = npr.normal(size=leading_dims + (ndim,) + nrhs) return L, x
def sgld_step(self): proposal = self.sgld_beta - self.lr * self.stochastic_grad( self.sgld_beta) + sqrt( 2 * self.lr * self.T) * normal(size=self.dim) if self.in_domain(proposal): self.sgld_beta = proposal
def neg_log_like(theta, inputs): return np.mean((inputs - theta[0])**2 / (2. * np.exp(theta[1]) + 1e-10) + theta[1] / 2.) def joint_neg_log_like(theta, inputs): global Xsize prior = np.sum(np.square(theta)) * 1 nll = Xsize * neg_log_like(theta, inputs) return nll + prior epsilon = 0.01 A = 1 theta = npr.normal(size=(2, )) sampler = SGLDSampler(precondition=True, noise_correction=False) updates = sampler.prepare_updates(joint_neg_log_like, theta, epsilon, A=A) # draw data std = 0.8 mean = 1.4 x = np.random.normal(size=(Xsize, 1)) * std + mean bsize = 20 n_samples = 8 * 10**4 samples = [] for i in range(n_samples): start = (i * bsize) % (x.shape[0] - bsize) xmb = np.copy(x[start:start + bsize])
def hess_nll(theta, i): h = (theta+1)*(theta-1)/14 + (theta+1)*(theta-3)/14 + (theta-1)*(theta-3)/14 \ + (theta+4)*(theta-1)/14 + (theta+4)*(theta-3)/14 + (theta-1)*(theta-3)/14 \ + (theta+4)*(theta+1)/14 + (theta+4)*(theta-3)/14 + (theta+1)*(theta-3)/14 \ + (theta+4)*(theta+1)/14 + (theta+4)*(theta-1)/14 + (theta+1)*(theta-1)/14 return h def print_xi_callback(i, s): Exi = s.xi_acc / s.count print("{} : xi = {} E[xi] = {}".format(i, s.xi, Exi)) epsilon = 0.1 A = 1 theta = npr.normal(size=(1, )) #theta = np.array([-2.94]) # use the thermostat #sampler = SGNHTSampler(resample_momentum=0) # use sgld with or without precoditioning sampler = SGLDSampler(precondition=False, resample_momentum=0) #sampler = SGLDSampler(precondition=True, noise_correction=False) # using custom gradient function updates = sampler.prepare_updates(neg_log_like, theta, epsilon, grad=grad_nll, A=A, callbacks=[print_xi_callback], callback_every=1000, fd_hess=True)
def main(argv): del argv n_clusters = FLAGS.num_clusters n_dimensions = FLAGS.num_dimensions n_observations = FLAGS.num_observations alpha = 3.3 * np.ones(n_clusters) a = 1. b = 1. kappa = 0.1 npr.seed(10001) # generate true latents and data pi = npr.gamma(alpha) pi /= pi.sum() mu = npr.normal(0, 1.5, [n_clusters, n_dimensions]) z = npr.choice(np.arange(n_clusters), size=n_observations, p=pi) x = npr.normal(mu[z, :], 0.5**2) # points used for initialization pi_est = np.ones(n_clusters) / n_clusters z_est = npr.choice(np.arange(n_clusters), size=n_observations, p=pi_est) mu_est = npr.normal(0., 0.01, [n_clusters, n_dimensions]) tau_est = 1. init_vals = pi_est, z_est, mu_est, tau_est # instantiate the model log joint log_joint = make_log_joint(x, alpha, a, b, kappa) # run mean field on variational mean parameters def callback(meanparams): fig = plot(meanparams, x) plt.savefig('/tmp/gmm_{:04d}.png'.format(itr)) plt.close(fig.number) supports = (SIMPLEX, INTEGER, REAL, NONNEGATIVE) neg_energy, normalizers, _, initializers, _, _ = \ multilinear_representation(log_joint, init_vals, supports) np_natparams = [initializer(10.) for initializer in initializers] np_meanparams = [ grad(normalizer)(natparam) for normalizer, natparam in zip(normalizers, np_natparams) ] # TODO(trandustin) try using feed_dict's to debug def tf_get_variable(inputs): return tf.get_variable(str(id(inputs)), initializer=tf.constant_initializer(inputs), dtype=tf.float32, shape=inputs.shape) tf_meanparams = container_fmap(tf_get_variable, np_meanparams) tf_natparams = container_fmap(tf_get_variable, np_natparams) # Represent the set of natural/mean parameters for each coordinate update. all_tf_natparams = [None] * len(normalizers) all_tf_natparams_assign_ops = [[]] * len(normalizers) # all_tf_meanparams = [None] * len(normalizers) all_tf_meanparams_assign_ops = [[]] * len(normalizers) for i in range(len(normalizers)): cast = lambda inputs: tf.cast(inputs, dtype=tf.float32) tf_update = make_tffun(grad(neg_energy, i), *np_meanparams) values = container_fmap(cast, tf_update(*tf_meanparams)) for variable, value in zip(tf_meanparams[i], values): assign_op = variable.assign(value) all_tf_natparams_assign_ops[i].append(assign_op) all_tf_natparams[i] = values tf_update = make_tffun(grad(normalizers[i]), np_natparams[i]) values = container_fmap(cast, tf_update(all_tf_natparams[i])) # values = container_fmap(cast, tf_update(tf_natparams)) for variable, value in zip(tf_natparams[i], values): assign_op = variable.assign(value) all_tf_meanparams_assign_ops[i].append(assign_op) # all_tf_meanparams[i] = values # Set config for 1 CPU, 1 core, 1 thread(?). config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, device_count={'CPU': 1}) # Find out device placement. # config = tf.ConfigProto(log_device_placement=True) sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) natparams = sess.run(tf_natparams) print("ELBO ", elbo_fn(neg_energy, normalizers, natparams)) start = time.time() for _ in range(FLAGS.num_iterations): for i in range(len(normalizers)): _ = sess.run(all_tf_natparams_assign_ops[i]) _ = sess.run(all_tf_meanparams_assign_ops[i]) runtime = time.time() - start print("CAVI Runtime (s): ", runtime) natparams = sess.run(tf_natparams) print("ELBO ", elbo_fn(neg_energy, normalizers, natparams))
def sample_mn(M, U, V): G = npr.normal(size=M.shape) return M + np.dot(np.dot(np.linalg.cholesky(U), G), np.linalg.cholesky(V).T)
make_binop = (lambda make_binop: lambda *args: add_binop_size_check( make_binop(*args)))(make_binop) add = make_binop(operator.add, tuple) sub = make_binop(operator.sub, tuple) mul = make_binop(operator.mul, tuple) div = make_binop(operator.truediv, tuple) allclose = make_binop(np.allclose, all) contract = make_binop(inner, sum) shape = make_unop(np.shape, tuple) unbox = make_unop(getval, tuple) sqrt = make_unop(np.sqrt, tuple) square = make_unop(lambda a: a**2, tuple) randn_like = make_unop(lambda a: npr.normal(size=np.shape(a)), tuple) zeros_like = make_unop(lambda a: np.zeros(np.shape(a)), tuple) flatten = make_unop(lambda a: np.ravel(a), np.concatenate) scale = make_scalar_op(operator.mul, tuple) add_scalar = make_scalar_op(operator.add, tuple) norm = lambda x: np.sqrt(contract(x, x)) rand_dir_like = lambda x: scale(1. / norm(x), randn_like(x)) isobjarray = lambda x: isinstance(x, np.ndarray) and x.dtype == np.object tuplify = Y(lambda f: lambda a: a if not istuple(a) and not isobjarray(a) else tuple(map(f, a))) depth = Y(lambda f: lambda a: np.ndim(a) if not istuple(a) else 1 + (min(map(f, a)) if len(a) else 1))
def stochastic_grad(self, beta): return grad(self.f)(beta) + 0.32 * normal(size=self.dim)
def sample_mn(M, U, V): G = npr.normal(size=M.shape) return M + np.dot(np.dot(np.linalg.cholesky(U), G), np.linalg.cholesky(V).T)
def sample(self, num): return npr.normal(loc=self.mean, scale=self.sd, size=(num, 1))
assert shape(a) == shape(b) return binop(a, b) return wrapped make_binop = (lambda make_binop: lambda *args: add_binop_size_check(make_binop(*args)))(make_binop) add = make_binop(operator.add, tuple) sub = make_binop(operator.sub, tuple) mul = make_binop(operator.mul, tuple) div = make_binop(operator.truediv, tuple) allclose = make_binop(np.allclose, all) contract = make_binop(inner, sum) shape = make_unop(np.shape, tuple) unbox = make_unop(getval, tuple) sqrt = make_unop(np.sqrt, tuple) square = make_unop(lambda a: a**2, tuple) randn_like = make_unop(lambda a: npr.normal(size=np.shape(a)), tuple) zeros_like = make_unop(lambda a: np.zeros(np.shape(a)), tuple) flatten = make_unop(lambda a: np.ravel(a), np.concatenate) scale = make_scalar_op(operator.mul, tuple) add_scalar = make_scalar_op(operator.add, tuple) norm = lambda x: np.sqrt(contract(x, x)) rand_dir_like = lambda x: scale(1./norm(x), randn_like(x)) isobjarray = lambda x: isinstance(x, np.ndarray) and x.dtype == np.object tuplify = Y(lambda f: lambda a: a if not istuple(a) and not isobjarray(a) else tuple(map(f, a))) depth = Y(lambda f: lambda a: np.ndim(a) if not istuple(a) else 1+(min(map(f, a)) if len(a) else 1))
def sample(natparams, num_samples=None): 'Takes a list of natural parameter tuples and produces an array of samples' mus, sigmasqs = unpack_params(natparams) T, n = mus.shape rand_shape = (T, num_samples, n) if num_samples else (T, n) return mus[:,None,:] + np.sqrt(sigmasqs)[:,None,:] * npr.normal(size=rand_shape)
ind = params[:, 0] < trunc_shape params[ind, 0] = trunc_shape ind = params[:, 1] < trunc_mean params[ind, 1] = trunc_mean return params # Initialize num_seed = 123 npr.seed(num_seed) params_R = np.zeros((n_latent, 2)) steps = np.ones((n_latent, 2)) sCur_R = np.zeros((n_latent, 2)) ELBO_R = np.zeros(n_iter) params_R[:, 0] = 0.5 + sigma * npr.normal(size=n_latent) params_R[:, 1] = sigma * npr.normal(size=n_latent) transformVar = np.log(1. + np.exp(params_R)) ELBO_R[0] = estimate_elbo(transformVar[:, 0], transformVar[:, 1], K, x, alphaz) for n in range(1, n_iter): print(n) sGrad = reparam_gradient(transformVar[:, 0], transformVar[:, 1], x, K, alphaz, corr=correction, B=B) / (1. + np.exp(-params_R)) steps, sCur_R = stepSize(n + 1, sCur_R, sGrad, eta)
# # grad_linear_with_fm_cost = grad(linear_with_fm_cost) # params={'x':xfm, 'w': w, 't': t} # num_epoch=10000 # alpha=0.001 # for i in range(num_epoch): # cost=grad_linear_with_fm_cost(params) # w-=alpha*cost['w'] # print(w) # # plt.plot(x,t,'r.') # plt.plot(x,np.dot(xfm,w), 'b-') # plt.show() x = np.linspace(-5, 5, 1000) t = x**3 - 20 * x + 10 + npr.normal(0, 4, x.shape[0]) inputs = x.reshape(x.shape[-1], 1) W1 = npr.randn(1, 4) b1 = npr.randn(4) W2 = npr.randn(4, 4) b2 = npr.randn(4) W3 = npr.randn(4, 1) b3 = npr.randn(1) params = {'W1': W1, 'W2': W2, 'W3': W3, 'b1': b1, 'b2': b2, 'b3': b3} def relu(x): return np.maximum(0, x)
def y_noise(n, sd): return npr.normal(loc=0, scale=sd, size=(n, 1))
grad_means = dp_log_prob grad_log_sigmas = dp_log_prob * eps * np.exp(log_sigmas) + 1 return np.concatenate([grad_means, grad_log_sigmas]) def calc_eps(self, means, log_sigma, z): eps = (z - means) / np.exp(log_sigma) return eps #Parameters N = 50000 K = 500 D = 1 x = 5 * npr.randn(N * K).reshape([N, K]) alpha = np.ones(K) w = npr.normal(0, 1. / np.sqrt(alpha)) y = np.matmul(x, w) + npr.randn(N) data = {} data['x'] = x data['y'] = y data['x'].shape batch_size = 5000 seed = 1234 learning_rate = 0.1 samples = 1 epochs = 50 model = LinearRegression(data, N / batch_size) sns.set_style(style='white')