def test_sampler_grads(): def compare_sampler_grads(lds, num_samples, seed): init_params, pair_params, node_params = lds messages, _ = natural_filter_forward_general(init_params, pair_params, node_params) def fun1(messages): npr.seed(seed) samples = natural_sample_backward_general(messages, pair_params, num_samples) return np.sum(np.sin(samples)) grads1 = grad(fun1)(messages) messages, _ = _natural_filter_forward_general(init_params, pair_params, node_params) def fun2(messages): npr.seed(seed) samples = _natural_sample_backward(messages, pair_params, num_samples) return np.sum(np.sin(samples)) grads2 = grad(fun2)(messages) unpack_dense_grads = lambda x: interleave(*map(lambda y: zip(*y), x)) assert allclose(grads1, unpack_dense_grads(grads2)) npr.seed(0) for i in xrange(25): n, T = npr.randint(1, 5), npr.randint(10, 50) num_samples = npr.randint(1, 10) yield compare_sampler_grads, rand_lds(n, T), num_samples, i
def test_sampler_grads(): def compare_sampler_grads(lds, num_samples, seed): init_params, pair_params, node_params = lds messages, _ = natural_filter_forward_general( init_params, pair_params, node_params) def fun1(messages): npr.seed(seed) samples = natural_sample_backward_general(messages, pair_params, num_samples) return np.sum(np.sin(samples)) grads1 = grad(fun1)(messages) messages, _ = _natural_filter_forward_general( init_params, pair_params, node_params) def fun2(messages): npr.seed(seed) samples = _natural_sample_backward(messages, pair_params, num_samples) return np.sum(np.sin(samples)) grads2 = grad(fun2)(messages) unpack_dense_grads = lambda x: interleave(*map(lambda y: zip(*y), x)) assert allclose(grads1, unpack_dense_grads(grads2)) npr.seed(0) for i in xrange(25): n, T = npr.randint(1, 5), npr.randint(10, 50) num_samples = npr.randint(1,10) yield compare_sampler_grads, rand_lds(n, T), num_samples, i
def load_csv_test_split(filename, test_size, rand_seed, input_name, target_name, conditions): data = ([], []) with open(filename) as file: reader = csv.DictReader(file) reader.next() for row in reader: add_row = True for key in conditions.keys(): if row[key] != conditions[key]: add_row = False if add_row: data[0].append(row[input_name]) data[1].append(float(row[target_name])) data = np.array(data) rand.seed(rand_seed) sequence = rand.choice(data[0].size, data[0].size, replace=False) testset = (data[0][sequence[:int(data[0].size * test_size)]], data[1][sequence[:int(data[0].size * test_size)]]) trainset = (data[0][sequence[int(data[0].size * test_size):]], data[1][sequence[int(data[0].size * test_size):]]) rand.seed() print 'Loaded', trainset[0].size, 'training points;', testset[ 0].size, 'test points.' return trainset, testset
def test_filter_grad(): def compare_grads(lds): init_params, pair_params, node_params = lds dotter = randn_like(natural_filter_forward_general( init_params, pair_params, node_params)[0]) def messages_to_scalar(messages): return contract(dotter, messages) def py_fun(node_params): messages, lognorm = natural_filter_forward_general( init_params, pair_params, node_params) return np.cos(lognorm) + messages_to_scalar(messages) def cy_fun(node_params): dense_messages, lognorm = _natural_filter_forward_general( init_params, pair_params, node_params) messages = unpack_dense_messages(dense_messages) return np.cos(lognorm) + messages_to_scalar(messages) g_py = grad(py_fun)(node_params) g_cy = grad(cy_fun)(node_params) assert allclose(g_py, g_cy) npr.seed(0) for _ in xrange(25): n, T = npr.randint(1, 5), npr.randint(10, 50) yield compare_grads, rand_lds(n, T)
def test_rts_backward_step(): npr.seed(0) n = 3 Jns = rand_psd(n) hns = npr.randn(n) mun = npr.randn(n) Jnp = rand_psd(n) hnp = npr.randn(n) Jf = rand_psd(n) + 10*np.eye(n) hf = npr.randn(n) bigJ = rand_psd(2*n) J11, J12, J22 = -1./2*bigJ[:n,:n], -bigJ[:n,n:], -1./2*bigJ[n:,n:] next_smooth = -1./2*Jns, hns, mun next_pred = -1./2*Jnp, hnp filtered = -1./2*Jf, hf pair_param = J11, J12, J22, 0. Js1, hs1, (mu1, ExxT1, ExxnT1) = natural_rts_backward_step( next_smooth, next_pred, filtered, pair_param) Js2, hs2, (mu2, ExxT2, ExnxT2) = rts_backward_step( next_smooth, next_pred, filtered, pair_param) assert np.allclose(Js1, Js2) assert np.allclose(hs1, hs2) assert np.allclose(mu1, mu2) assert np.allclose(ExxT1, ExxT2) assert np.allclose(ExxnT1, ExnxT2)
def train_nn(pred_fun, loss_fun, num_weights, train_smiles, train_raw_targets, train_params, validation_smiles=None, validation_raw_targets=None): """loss_fun has inputs (weights, smiles, targets)""" print "Total number of weights in the network:", num_weights npr.seed(0) init_weights = npr.randn(num_weights) * train_params['param_scale'] train_targets, undo_norm = normalize_array(train_raw_targets) training_curve = [] def callback(weights, iter): if iter % 10 == 0: print "max of weights", np.max(np.abs(weights)) train_preds = undo_norm(pred_fun(weights, train_smiles)) cur_loss = loss_fun(weights, train_smiles, train_targets) training_curve.append(cur_loss) print "Iteration", iter, "loss", cur_loss, "train RMSE", \ np.sqrt(np.mean((train_preds - train_raw_targets)**2)), if validation_smiles is not None: validation_preds = undo_norm(pred_fun(weights, validation_smiles)) print "Validation RMSE", iter, ":", \ np.sqrt(np.mean((validation_preds - validation_raw_targets) ** 2)), grad_fun = grad(loss_fun) grad_fun_with_data = build_batched_grad(grad_fun, train_params['batch_size'], train_smiles, train_targets) num_iters = train_params['num_epochs'] * len(train_smiles) / train_params['batch_size'] trained_weights = adam(grad_fun_with_data, init_weights, callback=callback, num_iters=num_iters, step_size=train_params['learn_rate'], b1=train_params['b1'], b2=train_params['b2']) def predict_func(new_smiles): """Returns to the original units that the raw targets were in.""" return undo_norm(pred_fun(trained_weights, new_smiles)) return predict_func, trained_weights, training_curve
def train(self, X_train, F_train, y_train, iters_retrain=25, num_iters=1000, batch_size=32, lr=1e-3, param_scale=0.01, log_every=10): npr.seed(42) num_retrains = num_iters // iters_retrain for i in range(num_retrains): self.gru.objective = self.objective # carry over weights from last training init_weights = self.gru.weights if i > 0 else None print('training deep net... [%d/%d], learning rate: %.4f' % (i + 1, num_retrains, lr)) self.gru.train(X_train, F_train, y_train, num_iters=iters_retrain, batch_size=batch_size, lr=lr, param_scale=param_scale, log_every=log_every, init_weights=init_weights) print('building surrogate dataset...') W_train = deepcopy(self.gru.saved_weights.T) APL_train = self.average_path_length_batch(W_train, X_train, F_train, y_train) print('training surrogate net... [%d/%d]' % (i + 1, num_retrains)) self.mlp.train(W_train[:self.gru.num_weights, :], APL_train, num_iters=3000, lr=1e-3, param_scale=0.1, log_every=250) self.pred_fun = self.gru.pred_fun self.weights = self.gru.weights # save final decision tree self.tree = self.gru.fit_tree(self.weights, X_train, F_train, y_train) return self.weights
def test_grad_arg1(): npr.seed(0) for leading_dims, nrhs, lower, trans in options: L, x = rand_instance(leading_dims, nrhs, ndim, lower) def fun(x): return to_scalar(solve_triangular(L, x, trans=trans, lower=lower)) yield check_grads, fun, x
def test_filter_grad(): def compare_grads(lds): init_params, pair_params, node_params = lds dotter = randn_like( natural_filter_forward_general(init_params, pair_params, node_params)[0]) def messages_to_scalar(messages): return contract(dotter, messages) def py_fun(node_params): messages, lognorm = natural_filter_forward_general( init_params, pair_params, node_params) return np.cos(lognorm) + messages_to_scalar(messages) def cy_fun(node_params): dense_messages, lognorm = _natural_filter_forward_general( init_params, pair_params, node_params) messages = unpack_dense_messages(dense_messages) return np.cos(lognorm) + messages_to_scalar(messages) g_py = grad(py_fun)(node_params) g_cy = grad(cy_fun)(node_params) assert allclose(g_py, g_cy) npr.seed(0) for _ in xrange(25): n, T = npr.randint(1, 5), npr.randint(10, 50) yield compare_grads, rand_lds(n, T)
def test_smoother_grads(): def compare_smoother_grads(lds): init_params, pair_params, node_params = lds symmetrize = make_unop(lambda x: (x + x.T)/2. if np.ndim(x) == 2 else x, tuple) messages, _ = natural_filter_forward_general(*lds) dotter = randn_like(natural_smoother_general(messages, *lds)) def py_fun(messages): result = natural_smoother_general(messages, *lds) assert shape(result) == shape(dotter) return contract(dotter, result) dense_messages, _ = _natural_filter_forward_general( init_params, pair_params, node_params) def cy_fun(messages): result = _natural_smoother_general(messages, pair_params) result = result[0][:3], result[1], result[2] assert shape(result) == shape(dotter) return contract(dotter, result) result_py = py_fun(messages) result_cy = cy_fun(dense_messages) assert np.isclose(result_py, result_cy) g_py = grad(py_fun)(messages) g_cy = unpack_dense_messages(grad(cy_fun)(dense_messages)) assert allclose(g_py, g_cy) npr.seed(0) for _ in xrange(50): n, T = npr.randint(1, 5), npr.randint(10, 50) yield compare_smoother_grads, rand_lds(n, T)
def test_natural_predict_grad(): npr.seed(0) n = 3 J = rand_psd(n) h = npr.randn(n) bigJ = rand_psd(2*n) J11, J12, J22 = bigJ[:n,:n], bigJ[:n,n:], bigJ[n:,n:] logZ = npr.randn() J, J11, J12, J22 = -1./2*J, -1./2*J11, -J12, -1./2*J22 ans = natural_predict((J, h), J11, J12, J22, logZ) dotter = (randn_like(J), randn_like(h)), randn_like(1.) def foo(*args): (J, h), logZ = natural_predict(*args) (a, b), c = dotter return np.sum(a*J) + np.sum(b*h) + c*logZ result1 = grad(foo)((J, h), J11, J12, J22, logZ) result2 = natural_predict_grad(dotter, ans, (J, h), J11, J12, J22, logZ) L, v, v2, temp, _, _ = natural_predict_forward_temps(J, J11, J12, h) result3 = _natural_predict_grad(dotter[0][0], dotter[0][1], dotter[1], -J12, L, v, v2, temp) for a, b in zip(result1, result2): check(a, b) for a, b in zip(result2, result3): check(a, b)
def classification_data(seed=0): """ Load 2D data. 2 Classes. Class labels generated from a 2-2-1 network. :param seed: random number seed :return: """ npr.seed(seed) data = np.load("./data/2D_toy_data_linear.npz") x = data['x'] y = data['y'] ids = np.arange(x.shape[0]) npr.shuffle(ids) # 75/25 split num_train = int(np.round(0.01 * x.shape[0])) x_train = x[ids[:num_train]] y_train = y[ids[:num_train]] x_test = x[ids[num_train:]] y_test = y[ids[num_train:]] mu = np.mean(x_train, axis=0) std = np.std(x_train, axis=0) x_train = (x_train - mu) / std x_test = (x_test - mu) / std train_stats = dict() train_stats['mu'] = mu train_stats['sigma'] = std return x_train, y_train, x_test, y_test, train_stats
def main(unused_argv): npr.seed(10001) def make_model(alpha, beta): """Generates matrix of shape [num_examples, num_features].""" def sample_model(): epsilon = ph.norm.rvs(0, 1, size=[num_examples, num_latents]) w = ph.norm.rvs(0, 1, size=[num_features, num_latents]) tau = ph.gamma.rvs(alpha, beta) x = ph.norm.rvs(np.dot(epsilon, w.T), 1. / np.sqrt(tau)) return [epsilon, w, tau, x] return sample_model num_examples = 50 num_features = 10 num_latents = 5 alpha = 2. beta = 8. sampler = make_model(alpha, beta) _, _, _, x = sampler() epsilon, w, tau, _ = sampler() # initialization log_joint_fn_ = ph.make_log_joint_fn(sampler) log_joint_fn = lambda *args: log_joint_fn_(*(args + (x, ))) # crappy partial cavi(log_joint_fn, (epsilon, w, tau), (REAL, REAL, NONNEGATIVE), 50)
def gen_rand_A(n, rho=None, seed=None, round_places=None): npr.seed(seed) A = npr.randn(n, n) if rho is not None: A = A * (rho / specrad(A)) if round_places is not None: A = A.round(round_places) return A
def train_nn(self, pred_fun, loss_fun, num_weights, train_smiles, train_raw_targets, train_params, validation_smiles=None, validation_raw_targets=None): #def train_nn(self, pred_fun, loss_fun, num_weights, train_fps, train_raw_targets, train_params, # validation_smiles=None, validation_raw_targets=None): """loss_fun has inputs (weights, smiles, targets)""" print "Total number of weights in the network:", num_weights npr.seed(0) init_weights = npr.randn(num_weights) * train_params['param_scale'] init_weights[-1] = self.other_param_dict['init_bias'] #train_targets, undo_norm = normalize_array(train_raw_targets) training_curve = [] def callback(weights, iter): if iter % 20 == 0: print "max of weights", np.max(np.abs(weights)) #train_preds = undo_norm(pred_fun(weights, train_smiles)) cur_loss = loss_fun(weights, train_smiles, train_raw_targets) #cur_loss = loss_fun(weights, train_fps, train_raw_targets) training_curve.append(cur_loss) print "Iteration", iter, "loss", cur_loss, grad_fun = grad(loss_fun) #grad_fun_with_data = build_batched_grad(grad_fun, train_params['batch_size'], # train_fps, train_raw_targets) grad_fun_with_data = build_batched_grad(grad_fun, train_params['batch_size'], train_smiles, train_raw_targets) #num_iters = train_params['num_epochs'] * np.shape(train_fps)[0] / train_params['batch_size'] num_iters = train_params['num_epochs'] * np.shape( train_smiles)[0] / train_params['batch_size'] trained_weights = adam(grad_fun_with_data, init_weights, callback=callback, num_iters=num_iters, step_size=train_params['learn_rate']) #b1=train_params['b1'], b2=train_params['b2']) def predict_func(new_smiles): """Returns to the original units that the raw targets were in.""" return pred_fun(trained_weights, new_smiles) def predict_func_fps(new_fps): """ return function for fps """ return pred_fun(trained_weights, new_fps) return predict_func, trained_weights, training_curve
def test_rts_backward_step_grad(): npr.seed(0) n = 5 Jns = rand_psd(n) + 10*np.eye(n) hns = npr.randn(n) mun = npr.randn(n) Jnp = rand_psd(n) hnp = npr.randn(n) Jf = (rand_psd(n) + 10*np.eye(n)) hf = npr.randn(n) bigJ = rand_psd(2*n) J11, J12, J22 = bigJ[:n,:n], bigJ[:n,n:], bigJ[n:,n:] next_smooth = Jns, hns, mun next_pred = Jnp, hnp filtered = Jf, hf pair_param = J11, J12, J22, 0. dotter = g_Js, g_hs, (g_Ex, g_ExxT, g_ExnxT) = \ npr.randn(n,n), npr.randn(n), (npr.randn(n), npr.randn(n,n), npr.randn(n,n)) # this function wraps natural_rts_backward_step to take care of factors of 2 def fun(next_smooth, next_pred, filtered, pair_param): (Jns, hns, mun), (Jnp, hnp), (Jf, hf) = next_smooth, next_pred, filtered next_smooth, next_pred, filtered = (-1./2*Jns, hns, mun), (-1./2*Jnp, hnp), (-1./2*Jf, hf) J11, J12, J22, logZ_pair = pair_param pair_param = -1./2*J11, -J12, -1./2*J22, logZ_pair neghalfJs, hs, (Ex, ExxT, ExnxT) = natural_rts_backward_step( next_smooth, next_pred, filtered, pair_param) Js = -2*neghalfJs return Js, hs, (Ex, ExxT, ExnxT) # ans Js, hs, (Ex, ExxT, ExnxT) = fun(next_smooth, next_pred, filtered, pair_param) def gfun(next_smooth, next_pred, filtered): vals = fun(next_smooth, next_pred, filtered, pair_param) assert shape(vals) == shape(dotter) return contract(dotter, vals) g1 = grad(lambda x: gfun(*x))((next_smooth, next_pred, filtered)) g2 = rts_backward_step_grad( g_Js, g_hs, g_Ex, g_ExxT, g_ExnxT, next_smooth, next_pred, filtered, pair_param, Js, hs, (Ex, ExxT, ExnxT)) assert allclose(g1, g2)
def test_param_conversion(): npr.seed(0) def check_params(natparam): natparam2 = standard_to_natural(*natural_to_standard(natparam)) assert np.allclose(natparam, natparam2) for _ in xrange(5): n, k = npr.randint(1, 5), npr.randint(1, 3) yield check_params, rand_natparam(n, k)
def test_pack_dense(): npr.seed(0) def check_params(natparam): natparam2 = pack_dense(*unpack_dense(natparam)) assert np.allclose(natparam, natparam2) for _ in xrange(5): n, k = npr.randint(1, 5), npr.randint(1, 3) yield check_params, rand_natparam(n, k)
def test_forward(): npr.seed(0) def check_forward(L, x, trans, lower): ans1 = solve(T(L) if trans in (1, 'T') else L, x) ans2 = solve_triangular(L, x, lower=lower, trans=trans) assert np.allclose(ans1, ans2) for leading_dims, nrhs, lower, trans in options: L, x = rand_instance(leading_dims, nrhs, ndim, lower) yield check_forward, L, x, trans, lower
def test_lognorm_grad(): def compare_lognorm_grads(hmm): dotter = npr.randn() py_grad = grad(lambda x: dotter * python_hmm_logZ(x))(hmm) cy_grad = grad(lambda x: dotter * cython_hmm_logZ(x))(hmm) assert allclose(py_grad, cy_grad) npr.seed(0) for _ in xrange(25): n, T = npr.randint(1, 10), npr.randint(10, 50) yield compare_lognorm_grads, rand_hmm(n, T)
def test_expectedstats_autograd(): npr.seed(0) def check_expectedstats(natparam): E_stats1 = expectedstats(natparam) E_stats2 = grad(logZ)(natparam) assert np.allclose(E_stats1, E_stats2) for _ in xrange(20): n, k = npr.randint(1, 5), npr.randint(1, 3) yield check_expectedstats, rand_natparam(n, k)
def test_lognorm(): def compare_lognorms(hmm): py_logZ = python_hmm_logZ(hmm) cy_logZ = cython_hmm_logZ(hmm) cy_logZ2 = cython_hmm_logZ_normalized(hmm)[0] assert np.isclose(py_logZ, cy_logZ) assert np.isclose(py_logZ, cy_logZ2) npr.seed(0) for _ in xrange(25): n, T = npr.randint(1, 10), npr.randint(10, 50) yield compare_lognorms, rand_hmm(n, T)
def val_split(data, val_fraction, seed=np.array([])): if seed.any(): npr.seed(seed) sequence = npr.choice(data[0].size, data[0].size, replace=False) val_lim = int(val_fraction * data[0].size) val_inputs = data[0][sequence[:val_lim]] val_targets = data[1][sequence[:val_lim]].astype('double') train_inputs = data[0][sequence[val_lim:]] train_targets = data[1][sequence[val_lim:]].astype('double') if seed.any(): npr.seed() return train_inputs, train_targets, val_inputs, val_targets
def test_hess_vector_prod(): npr.seed(1) randv = npr.randn(10) def fun(x): return np.sin(np.dot(x, randv)) df = grad(fun) def vector_product(x, v): return np.sin(np.dot(v, df(x))) ddf = grad(vector_product) A = npr.randn(10) B = npr.randn(10) check_grads(fun, A) check_grads(vector_product, A, B)
def compare_samplers(lds, num_samples, seed): init_params, pair_params, node_params = lds npr.seed(seed) messages1, _ = natural_filter_forward_general( init_params, pair_params, node_params) samples1 = natural_sample_backward_general(messages1, pair_params, num_samples) npr.seed(seed) dense_messages2, _ = _natural_filter_forward_general( init_params, pair_params, node_params) samples2 = _natural_sample_backward(dense_messages2, pair_params, num_samples) assert np.allclose(samples1, samples2)
def compare_samplers(lds, num_samples, seed): init_params, pair_params, node_params = lds npr.seed(seed) messages1, _ = natural_filter_forward_general(init_params, pair_params, node_params) samples1 = natural_sample_backward_general(messages1, pair_params, num_samples) npr.seed(seed) dense_messages2, _ = _natural_filter_forward_general( init_params, pair_params, node_params) samples2 = _natural_sample_backward(dense_messages2, pair_params, num_samples) assert np.allclose(samples1, samples2)
def test_filters(): def compare_filters(lds): init_params, pair_params, node_params = lds messages1, lognorm1 = natural_filter_forward_general( init_params, pair_params, node_params) dense_messages2, lognorm2 = _natural_filter_forward_general( init_params, pair_params, node_params) messages2 = unpack_dense_messages(dense_messages2) assert allclose(messages1, messages2) assert np.isclose(lognorm1, lognorm2) npr.seed(0) for _ in xrange(25): n, T = npr.randint(1, 5), npr.randint(10, 50) yield compare_filters, rand_lds(n, T)
def test_rts_3(): npr.seed(0) n = 3 # inputs L = np.linalg.cholesky(rand_psd(n)) Sigma = rand_psd(n) mu = npr.randn(n) mun = npr.randn(n) # constants J12 = rand_psd(2*n)[:n,n:] # outgrads g_ExnxT = npr.randn(n,n) g_ExxT = npr.randn(n,n) g_Ex = npr.randn(n) def step3(L, Sigma, mu, mun): temp2 = np.dot(-J12.T, Sigma) Sigma_21 = solve_posdef_from_cholesky(L, temp2) ExnxT = Sigma_21 + np.outer(mun, mu) ExxT = Sigma + np.outer(mu, mu) return mu, ExxT, ExnxT # ans Ex, ExxT, ExnxT = step3(L, Sigma, mu, mun) # compare grads def fun(args): Ex, ExxT, ExnxT = step3(*args) return np.sum(ExnxT * g_ExnxT) + np.sum(ExxT * g_ExxT) + np.sum(Ex * g_Ex) g_L1, g_Sigma1, g_mu1, g_mun1 = grad(fun)((L, Sigma, mu, mun)) g_L2, g_Sigma2, g_mu2, g_mun2 = rts_3_grad( g_Ex, g_ExxT, g_ExnxT, Ex, ExxT, ExnxT, L, Sigma, mu, mun, J12) assert np.allclose(g_L1, g_L2) assert np.allclose(g_Sigma1, g_Sigma2) assert np.allclose(g_mu1, g_mu2) assert np.allclose(g_mun1, g_mun2)
def test_rts_1(): npr.seed(0) n = 3 # inputs Jns = rand_psd(n) + 10*np.eye(n) hns = npr.randn(n) Jnp = rand_psd(n) hnp = npr.randn(n) Jf = rand_psd(n) hf = npr.randn(n) # constants bigJ = rand_psd(2*n) J11, J12, J22 = bigJ[:n,:n], bigJ[:n,n:], bigJ[n:,n:] L = np.linalg.cholesky(Jns - Jnp + J22) # outgrads g_Js = npr.randn(n,n) g_hs = npr.randn(n) def step1(L, hns, hnp, Jf, hf): temp = solve_triangular(L, J12.T) Js = Jf + J11 - np.dot(temp.T, temp) hs = hf - np.dot(temp.T, solve_triangular(L, hns - hnp)) return Js, hs # ans Js, hs = step1(L, hns, hnp, Jf, hf) def fun(args): Js, hs = step1(*args) return np.sum(g_Js * Js) + np.sum(g_hs * hs) g_L1, g_hns1, g_hnp1, g_Jf1, g_hf1 = grad(fun)((L, hns, hnp, Jf, hf)) g_L2, g_hns2, g_hnp2, g_Jf2, g_hf2 = rts_1_grad( g_Js, g_hs, Js, hs, L, hns, hnp, Jf, hf, J11, J12) assert np.allclose(g_hns1, g_hns2) assert np.allclose(g_hnp1, g_hnp2) assert np.allclose(g_Jf1, g_Jf2) assert np.allclose(g_hf1, g_hf2) assert np.allclose(g_L1, g_L2)
def test_natural_predict(): npr.seed(0) n = 3 J = rand_psd(n) h = npr.randn(n) bigJ = rand_psd(2*n) J11, J12, J22 = bigJ[:n,:n], bigJ[:n,n:], bigJ[n:,n:] logZ = npr.randn() J, J11, J12, J22 = -1./2*J, -1./2*J11, -J12, -1./2*J22 (J_pred_1, h_pred_1), lognorm1 = _natural_predict(J, h, J11, J12, J22, logZ) (J_pred_2, h_pred_2), lognorm2 = __natural_predict(J, h, J11, J12, J22, logZ) assert np.allclose(J_pred_1, J_pred_2) assert np.allclose(h_pred_1, h_pred_2) assert np.isclose(lognorm1, lognorm2)
def test_lognorm_grads(): npr.seed(0) n = 3 L = np.linalg.cholesky(rand_psd(n)) v = npr.randn(n) foo = lambda L, v: 1./2*np.dot(v,v) - np.sum(np.log(np.diag(L))) ans = foo(L, v) a = grad(foo, 0)(L, v) b = lognorm_grad_arg0(1., ans, L, v) check(a, b) a = grad(foo, 1)(L, v) b = lognorm_grad_arg1(1., ans, L, v) check(a, b)
def test_solve_triangular_grads(): npr.seed(0) n = 3 foo = lambda L, v, trans: to_scalar(solve_triangular(L, v, trans)) L = np.linalg.cholesky(rand_psd(n)) for v in [npr.randn(n), npr.randn(n,n)]: for trans in ['N', 'T']: ans = solve_triangular(L, v, trans) a = grad(foo, 0)(L, v, trans) b = solve_triangular_grad_arg0(randn_like(ans), ans, L, v, trans) check(a, b) a = grad(foo, 1)(L, v, trans) b = solve_triangular_grad_arg1(randn_like(ans), ans, L, v, trans) check(a, b)
def main(argv): del argv n_clusters = FLAGS.num_clusters n_dimensions = FLAGS.num_dimensions n_observations = FLAGS.num_observations alpha = 3.3 * np.ones(n_clusters) a = 1. b = 1. kappa = 0.1 npr.seed(10001) # generate true latents and data pi = npr.gamma(alpha) pi /= pi.sum() mu = npr.normal(0, 1.5, [n_clusters, n_dimensions]) z = npr.choice(np.arange(n_clusters), size=n_observations, p=pi) x = npr.normal(mu[z, :], 0.5**2) # points used for initialization pi_est = np.ones(n_clusters) / n_clusters z_est = npr.choice(np.arange(n_clusters), size=n_observations, p=pi_est) mu_est = npr.normal(0., 0.01, [n_clusters, n_dimensions]) tau_est = 1. init_vals = pi_est, z_est, mu_est, tau_est # instantiate the model log joint log_joint = make_log_joint(x, alpha, a, b, kappa) # run mean field on variational mean parameters def callback(meanparams): fig = plot(meanparams, x) plt.savefig('/tmp/gmm_{:04d}.png'.format(itr)) plt.close(fig.number) start = time.time() cavi(log_joint, init_vals, (SIMPLEX, INTEGER, REAL, NONNEGATIVE), FLAGS.num_iterations, callback=lambda *args: None) runtime = time.time() - start print("CAVI Runtime (s): ", runtime)
def test_natural_sample(): npr.seed(0) n = 3 s = 5 J = rand_psd(n) h = npr.randn(n, s) eps = npr.randn(n, s) def natural_sample(J, h, eps): mu = np.linalg.solve(J, h) L = np.linalg.cholesky(J) noise = solve_triangular(L, eps, 'T') return mu + noise sample1 = natural_sample(J, h, eps) sample2 = _natural_sample(J, h, eps) assert np.allclose(sample1, sample2)
def test_natural_lognorm_grad(): npr.seed(0) n = 3 J = rand_psd(n) h = npr.randn(n) def natural_lognorm((J, h)): L = np.linalg.cholesky(J) v = solve_triangular(L, h) return 1./2*np.dot(v, v) - np.sum(np.log(np.diag(L))) g_J_1, g_h_1 = grad(lambda x: np.pi*natural_lognorm(x))((J, h)) L = np.linalg.cholesky(J) v = solve_triangular(L, h) g_J_2, g_h_2 = natural_lognorm_grad(np.pi, L, v) assert np.allclose(g_J_1, g_J_2) assert np.allclose(g_h_1, g_h_2)
def test_dpotrs_grad(): npr.seed(0) n = 3 s = 5 J = rand_psd(n) h = npr.randn(n, s) L = np.linalg.cholesky(J) dpotrs = lambda (L, h): solve_triangular(L, solve_triangular(L, h), 'T') ans = dpotrs((L, h)) dotter = npr.randn(*ans.shape) assert np.allclose(ans, np.linalg.solve(J, h)) g_L_1, g_h_1 = grad(lambda x: np.sum(dotter * dpotrs(x)))((L, h)) g_L_2, g_h_2 = dpotrs_grad(dotter, ans, L, h) assert np.allclose(g_L_1, g_L_2) assert np.allclose(g_h_1, g_h_2)
def test_samplers(): def compare_samplers(lds, num_samples, seed): init_params, pair_params, node_params = lds npr.seed(seed) messages1, _ = natural_filter_forward_general( init_params, pair_params, node_params) samples1 = natural_sample_backward_general(messages1, pair_params, num_samples) npr.seed(seed) dense_messages2, _ = _natural_filter_forward_general( init_params, pair_params, node_params) samples2 = _natural_sample_backward(dense_messages2, pair_params, num_samples) assert np.allclose(samples1, samples2) npr.seed(0) for i in xrange(25): n, T = npr.randint(1, 5), npr.randint(10, 50) num_samples = npr.randint(1,10) yield compare_samplers, rand_lds(n, T), num_samples, i
def test_smoothers(): def compare_smoothers(lds): init_params, pair_params, node_params = lds messages1, _ = natural_filter_forward_general( init_params, pair_params, node_params) E_init_stats1, E_pair_stats1, E_node_stats1 = \ natural_smoother_general(messages1, *lds) dense_messages2, _ = _natural_filter_forward_general( init_params, pair_params, node_params) E_init_stats2, E_pair_stats2, E_node_stats2 = \ _natural_smoother_general(dense_messages2, pair_params) assert allclose(E_init_stats1[:3], E_init_stats2[:3]) assert allclose(E_pair_stats1, E_pair_stats2) assert allclose(E_node_stats1, E_node_stats2) npr.seed(0) for _ in xrange(25): n, T = npr.randint(1, 5), npr.randint(10, 50) yield compare_smoothers, rand_lds(n, T)
def test_natural_sample_grad(): npr.seed(0) n = 3 s = 5 J = rand_psd(n) h = npr.randn(n, s) eps = npr.randn(n, s) dotter = npr.randn(*eps.shape) def natural_sample(J, h, eps): L = np.linalg.cholesky(J) mu = solve_posdef_from_cholesky(L, h) noise = solve_triangular(L, eps, 'T') return mu + noise g_J_1, g_h_1 = grad(lambda (J, h): np.sum(dotter * natural_sample(J, h, eps)))((J, h)) g_J_2, g_h_2 = natural_sample_grad(dotter, natural_sample(J, h, eps), J, h, eps) assert np.allclose(g_J_1, g_J_2) assert np.allclose(g_h_1, g_h_2)
def test_info_to_mean_grad(): npr.seed(0) n = 3 g_mu = npr.randn(n) g_Sigma = npr.randn(3, 3) J = rand_psd(n) h = npr.randn(n) def info_to_mean((J, h)): Sigma = np.linalg.inv(J) mu = np.dot(Sigma, h) return mu, Sigma def fun1((J, h)): mu, Sigma = info_to_mean((J, h)) return np.sum(g_mu * mu) + np.sum(g_Sigma * Sigma) g_J_1, g_h_1 = grad(fun1)((J, h)) g_J_2, g_h_2 = info_to_mean_grad(g_mu, g_Sigma, J, h) assert np.allclose(g_h_1, g_h_2) assert np.allclose(g_J_1, g_J_2)
from __future__ import absolute_import import autograd.numpy as np import autograd.numpy.random as npr from autograd.util import * from autograd import grad npr.seed(1) def test_abs(): fun = lambda x : 3.0 * np.abs(x) d_fun = grad(fun) check_grads(fun, 1.1) check_grads(fun, -1.1) check_grads(fun, 0.) check_grads(d_fun, 1.1) check_grads(d_fun, -1.1) # check_grads(d_fun, 0.) # higher-order numerical check doesn't work at non-diffable point def test_sin(): fun = lambda x : 3.0 * np.sin(x) d_fun = grad(fun) check_grads(fun, npr.randn()) check_grads(d_fun, npr.randn()) def test_sign(): fun = lambda x : 3.0 * np.sign(x) d_fun = grad(fun) check_grads(fun, 1.1) check_grads(fun, -1.1) check_grads(d_fun, 1.1) check_grads(d_fun, -1.1)
from __future__ import absolute_import import autograd.numpy.random as npr import autograd.numpy as np import operator as op from numpy_utils import (combo_check, stat_check, unary_ufunc_check, binary_ufunc_check, binary_ufunc_check_no_same_args) npr.seed(0) # Array statistics functions def test_max(): stat_check(np.max) def test_all(): stat_check(np.all) def test_any(): stat_check(np.any) def test_max(): stat_check(np.max) def test_mean(): stat_check(np.mean) def test_min(): stat_check(np.min) def test_sum(): stat_check(np.sum) def test_prod(): stat_check(np.prod) def test_var(): stat_check(np.var) def test_std(): stat_check(np.std) # Unary ufunc tests def test_sin(): unary_ufunc_check(np.sin) def test_abs(): unary_ufunc_check(np.abs, lims=[0.1, 4.0]) def test_absolute():unary_ufunc_check(np.absolute, lims=[0.1, 4.0]) def test_arccosh(): unary_ufunc_check(np.arccosh, lims=[1.1, 4.0]) def test_arcsinh(): unary_ufunc_check(np.arcsinh, lims=[-0.9, 0.9]) def test_arctanh(): unary_ufunc_check(np.arctanh, lims=[-0.9, 0.9]) def test_ceil(): unary_ufunc_check(np.ceil, lims=[-1.5, 1.5], test_complex=False) def test_cos(): unary_ufunc_check(np.cos) def test_cosh(): unary_ufunc_check(np.cosh)
beta_kern = GPy.kern.Matern52(input_dim=1, variance=1., lengthscale=length_scale) K_beta = beta_kern.K(lam0.reshape((-1, 1))) K_chol = np.linalg.cholesky(K_beta) K_inv = np.linalg.inv(K_beta) ########################################################################## ## set up the likelihood and prior functions and generate a sample ########################################################################## parser, loss_fun, loss_grad, prior_loss, prior_grad = \ make_functions(X, Lam, lam0, lam0_delta, K, Kinv_beta = K_inv, K_chol = K_chol, sig2_omega = sig2_omega, sig2_mu = sig2_mu) # sample from prior npr.seed(chain_idx + 42) # different initialization th = np.zeros(parser.N) parser.set(th, 'betas', .001 * np.random.randn(K, len(lam0))) parser.set(th, 'omegas', .01 * npr.randn(N, K)) parser.set(th, 'mus', .01 * npr.randn(N)) #print "initial loss", loss_fun(th) check_grad(fun = lambda th: loss_fun(th) + prior_loss(th), # X, Lam), jac = lambda th: loss_grad(th) + prior_grad(th), #, X, Lam), th = th) ########################################################################### ## optimize for about 350 iterations to get to some meaty part of the dist ########################################################################### cache_fname = 'cache/basis_samples_K-4_V-1364_chain_%d.npy'%chain_idx if True and os.path.exists(cache_fname):