def _test_linear_regression(self, default, dtype): def build_toy_dataset(N, w, noise_std=0.1): D = len(w) x = np.random.randn(N, D) y = np.dot(x, w) + np.random.normal(0, noise_std, size=N) return x, y with self.test_session() as sess: N = 40 # number of data points D = 10 # number of features w_true = np.random.randn(D) X_train, y_train = build_toy_dataset(N, w_true) X_test, y_test = build_toy_dataset(N, w_true) X = tf.placeholder(dtype, [N, D]) w = Normal(loc=tf.zeros(D, dtype=dtype), scale=tf.ones(D, dtype=dtype)) b = Normal(loc=tf.zeros(1, dtype=dtype), scale=tf.ones(1, dtype=dtype)) y = Normal(loc=ed.dot(X, w) + b, scale=0.1 * tf.ones(N, dtype=dtype)) n_samples = 2000 if not default: qw = Empirical( tf.Variable(tf.zeros([n_samples, D], dtype=dtype))) qb = Empirical( tf.Variable(tf.zeros([n_samples, 1], dtype=dtype))) inference = ed.SGHMC({ w: qw, b: qb }, data={ X: X_train, y: y_train }) else: inference = ed.SGHMC([w, b], data={X: X_train, y: y_train}) qw = inference.latent_vars[w] qb = inference.latent_vars[b] inference.run(step_size=0.0001) self.assertAllClose(qw.mean().eval(), w_true, rtol=5e-1, atol=5e-1) self.assertAllClose(qb.mean().eval(), [0.0], rtol=5e-1, atol=5e-1) old_t, old_n_accept = sess.run([inference.t, inference.n_accept]) if not default: self.assertEqual(old_t, n_samples) else: self.assertEqual(old_t, 1e4) self.assertGreater(old_n_accept, 0.1) sess.run(inference.reset) new_t, new_n_accept = sess.run([inference.t, inference.n_accept]) self.assertEqual(new_t, 0) self.assertEqual(new_n_accept, 0)
def ed_graph_2(disc=1): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=(std**2/D)*tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2/n_hidden)*tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2/D)*tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(K), scale=(std**2/n_hidden)*tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std*D**(-.5)*tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std*n_hidden**(-.5)*tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std*D**(-.5)*tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(K), scale=std*n_hidden**(-.5)*tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df*tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=std**2/D*tf.ones([D, n_hidden])) W_1 = StudentT(df=df*tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2/n_hidden*tf.ones([n_hidden, K])) b_0 = StudentT(df=df*tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=std**2/D*tf.ones(n_hidden)) b_1 = StudentT(df=df*tf.ones([K]), loc=tf.zeros(K), scale=std**2/n_hidden*tf.ones(K)) x = tf.placeholder(tf.float32, [None, None]) y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1)) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [None]) # Use a placeholder for the pre-trained posteriors w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distribution qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) if str(sys.argv[3]) == 'hmc': inference = ed.HMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size = disc*leap_size, n_steps = step_no, n_print=100) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size = disc*leap_size, friction=disc**2*0.1, n_print=100) return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference, w0, w1, b0, b1)
def _test_normal_normal(self, default, dtype): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=tf.constant(0.0, dtype=dtype), scale=tf.constant(1.0, dtype=dtype)) x = Normal(loc=mu, scale=tf.constant(1.0, dtype=dtype), sample_shape=50) n_samples = 2000 # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) if not default: qmu = Empirical( params=tf.Variable(tf.ones(n_samples, dtype=dtype))) inference = ed.SGHMC({mu: qmu}, data={x: x_data}) else: inference = ed.SGHMC([mu], data={x: x_data}) qmu = inference.latent_vars[mu] inference.run(step_size=0.025) self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1) old_t, old_n_accept = sess.run([inference.t, inference.n_accept]) if not default: self.assertEqual(old_t, n_samples) else: self.assertEqual(old_t, 1e4) self.assertGreater(old_n_accept, 0.1) sess.run(inference.reset) new_t, new_n_accept = sess.run([inference.t, inference.n_accept]) self.assertEqual(new_t, 0) self.assertEqual(new_n_accept, 0)
def test_normalnormal_float32(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) qmu = Empirical(params=tf.Variable(tf.ones(5000))) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.SGHMC({mu: qmu}, data={x: x_data}) inference.run(step_size=0.025) self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-2, atol=1.5e-2) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=5e-2, atol=5e-2)
def ed_graph_init(): # Graph for prior distributions if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**(-.5) * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(K), scale=std * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2 / n_hidden * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = StudentT(df=df * tf.ones([K]), loc=tf.zeros(K), scale=std**2 / n_hidden * tf.ones(K)) # Inputs x = tf.placeholder(tf.float32, [None, D]) # Regression likelihood y = Normal(loc=nn(x, W_0, b_0, W_1, b_1), scale=std_out * tf.ones([tf.shape(x)[0]])) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.float32, [None]) # Graph for posterior distribution if str(sys.argv[4]) == 'normal': qW_0 = Empirical( params=tf.Variable(tf.random_normal([n_samp, D, n_hidden]))) qW_1 = Empirical(params=tf.Variable( tf.random_normal([n_samp, n_hidden, K], stddev=std * (n_hidden**-.5)))) qb_0 = Empirical( params=tf.Variable(tf.random_normal([n_samp, n_hidden]))) qb_1 = Empirical(params=tf.Variable( tf.random_normal([n_samp, K], stddev=std * (n_hidden**-.5)))) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': # Use a placeholder otherwise cannot assign a tensor > 2GB w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distribution qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) # Build inference graph if str(sys.argv[3]) == 'hmc': inference = ed.HMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference, w0, w1, b0, b1) else: return (x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference
#sigma2 = Normal(loc=tf.zeros([1]), scale=tf.ones([1])*100) y = Normal(loc=ed.dot(x_train, Wf) + ed.dot(z_train, Wb) + Ib, scale=tf.log(sigma2)) # INFERENCE sess = ed.get_session() T = 10000 qi = Empirical(params=tf.Variable(tf.zeros([T, 1]))) qw = Empirical(params=tf.Variable(tf.zeros([T, D]))) qb = Empirical(params=tf.Variable(tf.zeros([T, Db]))) qsigma2 = Empirical(params=tf.Variable(tf.ones([T, 1]))) inference = ed.SGHMC({ Wf: qw, Wb: qb, Ib: qi, sigma2: qsigma2 }, data={y: y_train}) inference.run(step_size=.0005) f, (ax1, ax2, ax3, ax4) = plt.subplots(4, sharex=True) ax1.plot(qi.get_variables()[0].eval()) ax2.plot(qw.get_variables()[0].eval()) ax3.plot(qb.get_variables()[0].eval()) ax4.plot(qsigma2.get_variables()[0].eval()) burnin = int(T / 2) qi_post = qi.get_variables()[0].eval()[burnin:].mean(axis=0) qw_post = qw.get_variables()[0].eval()[burnin:].mean(axis=0) qb_post = qb.get_variables()[0].eval()[burnin:].mean(axis=0)
y=tf.identity(Categorical(Softmax(X,dic)),name="y") with tf.name_scope("posterior"): Nsamples=1000 with tf.name_scope("qweights"): qweights = Empirical(params=tf.Variable(tf.random_normal([Nsamples,dim,nb_classes]))) with tf.name_scope("qbias"): qbias=Empirical(params=tf.Variable(tf.random_normal([Nsamples,nb_classes]))) N=100 x = tf.placeholder(tf.float32, shape=[None, dim]) y_ph = tf.placeholder(tf.int32, shape=[None]) inference = ed.SGHMC({weights:qweights,bias:qbias},data={y:y_ph}) inference.initialize(n_iter=1000, n_print=100,step_size=1e-1, friction=1.0) sess = tf.InteractiveSession() tf.global_variables_initializer().run() print("Session") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) num_examples = len(X_train) new_y_train = y_train.values.flatten() print("Training...") print(inference.n_iter) for i in range(inference.n_iter): info_dict=inference.update(feed_dict={x: X_train, y_ph: new_y_train}) inference.print_progress(info_dict)
def ed_graph_2(disc=1): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=(std**2 / D) * tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, n_hidden]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, n_hidden])) W_2 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2 / D) * tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2 / n_hidden) * tf.ones(n_hidden)) b_2 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std * D**-.5 * tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**-.5 * tf.ones([n_hidden, K])) W_2 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**-.5 * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std * D**-.5 * tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(n_hidden), scale=10 * n_hidden**(-.5) * tf.ones(n_hidden)) b_2 = Normal(loc=tf.zeros(K), scale=10 * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=(std**2 / D) * tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, n_hidden]), loc=tf.zeros([n_hidden, n_hidden]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, n_hidden])) W_2 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=(std**2 / D) * tf.ones(n_hidden)) b_1 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=(std**2 / n_hidden) * tf.ones(n_hidden)) b_2 = StudentT(df=df * tf.ones([K]), loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) x = tf.placeholder(tf.float32, [None, None]) y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1, W_2, b_2)) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [N]) # Use a placeholder for the pre-trained posteriors p0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) p1 = tf.placeholder(tf.float32, [n_samp, n_hidden, n_hidden]) p2 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) pp0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) pp1 = tf.placeholder(tf.float32, [n_samp, n_hidden]) pp2 = tf.placeholder(tf.float32, [n_samp, K]) w0 = tf.Variable(p0) w1 = tf.Variable(p1) w2 = tf.Variable(p2) b0 = tf.Variable(pp0) b1 = tf.Variable(pp1) b2 = tf.Variable(pp2) # Empirical distribution qW_0 = Empirical(params=w0) qW_1 = Empirical(params=w1) qW_2 = Empirical(params=w2) qb_0 = Empirical(params=b0) qb_1 = Empirical(params=b1) qb_2 = Empirical(params=b2) if str(sys.argv[3]) == 'hmc': inference = ed.HMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100, scale={y: float(mnist.train.num_examples) / N}) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100, scale={y: float(mnist.train.num_examples) / N}) return ((x, y), y_ph, W_0, b_0, W_1, b_1, W_2, b_2, qW_0, qb_0, qW_1, qb_1, qW_2, qb_2, inference, p0, p1, p2, pp0, pp1, pp2, w0, w1, w2, b0, b1, b2)
#qconv_b2 = Normal(loc=tf.Variable(tf.random_normal([num_filters])), # scale=tf.nn.softplus(tf.Variable(tf.random_normal([num_filters])))) #qw = Normal(loc=tf.Variable(tf.random_normal([num_filters* (D / 16), K])), # scale=tf.nn.softplus(tf.Variable(tf.random_normal([num_filters* (D / 16), K])))) #qb = Normal(loc=tf.Variable(tf.random_normal([K])), # scale=tf.nn.softplus(tf.Variable(tf.random_normal([K])))) y_ph = tf.placeholder(tf.int32, [N]) #inference = ed.KLqp({conv1: qconv1, conv_b1: qconv_b1,conv2: qconv2, conv_b2: qconv_b2, w: qw,b: qb}, data={y: y_ph}) inference = ed.SGHMC( { conv1: qconv1, conv_b1: qconv_b1, conv2: qconv2, conv_b2: qconv_b2, w: qw, b: qb }, data={y: y_ph}) inference.initialize(n_iter=10000, n_print=100, scale={y: float(mnist.train.num_examples) / N}) # use interactive session sess = tf.InteractiveSession() tf.global_variables_initializer().run() for _ in range(inference.n_iter):
def ed_graph_2(disc=1): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, n_hidden]), scale=std**2*(n_hidden**-1)*tf.ones([n_hidden, n_hidden])) W_2 = Laplace(loc=tf.zeros([n_hidden, K]), scale=std**2*(n_hidden**-1)*tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(n_hidden), scale=std**2*(n_hidden**-1)*tf.ones(n_hidden)) b_2 = Laplace(loc=tf.zeros(K), scale=std**2*(n_hidden**-1)*tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, n_hidden]), scale=std*(n_hidden**-.5)*tf.ones([n_hidden, n_hidden])) W_2 = Normal(loc=tf.zeros([n_hidden, K]), scale=std*(n_hidden**-.5)*tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(n_hidden), scale=std*(n_hidden**-.5)*tf.ones(n_hidden)) b_2 = Normal(loc=tf.zeros(K), scale=std*(n_hidden**-.5)*tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df*tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=tf.ones([D, n_hidden])) W_1 = StudentT(df=df*tf.ones([n_hidden, n_hidden]), loc=tf.zeros([n_hidden, n_hidden]), scale=std**2/n_hidden*tf.ones([n_hidden, n_hidden])) W_2 = StudentT(df=df*tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2/n_hidden*tf.ones([n_hidden, K])) b_0 = StudentT(df=df*tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=tf.ones(n_hidden)) b_1 = StudentT(df=df*tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=std**2/n_hidden*tf.ones(n_hidden)) b_2 = StudentT(df=df*tf.ones([K]), loc=tf.zeros(K), scale=std**2/n_hidden*tf.ones(K)) # Inputs x = tf.placeholder(tf.float32, [None, None]) # Regression output y = Normal(loc=nn(x, W_0, b_0, W_1, b_1, W_2, b_2), scale=std_out*tf.ones([tf.shape(x)[0]])) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.float32, [None]) # Use a placeholder for the pre-trained posteriors w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, n_hidden]) w2 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b2 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distributions qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qW_2 = Empirical(params=tf.Variable(w2)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) qb_2 = Empirical(params=tf.Variable(b2)) if str(sys.argv[3]) == 'hmc': inference = ed.HMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2}, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2}, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size = disc*leap_size, n_steps = step_no, n_print=100) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size = disc*leap_size, friction=0.4, n_print=100) return ((x, y), y_ph, W_0, b_0, W_1, b_1, W_2, b_2, qW_0, qb_0, qW_1, qb_1, qW_2, qb_2, inference, w0, w1, w2, b0, b1, b2)
qb_fc1 = Empirical(params=tf.Variable(1 / 1000 * tf.random_normal([T, 1024]))) qW_fc2 = Empirical(params=tf.Variable(1 / 1000 * tf.random_normal([T, 1024, 10]))) qb_fc2 = Empirical(params=tf.Variable(1 / 1000 * tf.random_normal([T, 10]))) dropout = 0.9 inference = ed.SGHMC( { W_conv1: qW_conv1, b_conv1: qb_conv1, W_conv2: qW_conv2, b_conv2: qb_conv2, W_fc1: qW_fc1, b_fc1: qb_fc1, W_fc2: qW_fc2, b_fc2: qb_fc2 }, data={y: y_}) inference.initialize(n_iter=5000, n_print=100, scale={y: float(mnist.train.num_examples) / N}) sess = tf.InteractiveSession() tf.global_variables_initializer().run() for _ in range(inference.n_iter): X_batch, Y_batch = mnist.train.next_batch(N)
qb_0 = Empirical(params=b0) qb_1 = Empirical(params=b1) if str(sys.argv[2]) == 'hmc': inference = ed.HMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) if str(sys.argv[2]) == 'sghmc': inference = ed.SGHMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) # Initialse the infernce variables if str(sys.argv[2]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100) if str(sys.argv[2]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100) if str(sys.argv[2]) == 'kl': inference.initialize(n_iter=inf_iter, n_print=100) sess = ed.get_session() if str(sys.argv[3]) == 'laplace' or str(sys.argv[3]) == 'T':
def ed_graph_init(): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([F, F, 1, C]), scale=tf.ones([F, F, 1, C])) W_1 = Laplace(loc=tf.zeros([F, F, C, C]), scale=tf.ones([F, F, C, C])) W_2 = Laplace(loc=tf.zeros([7 * 7 * C, n_hidden]), scale=std**2 / (7 * 7 * C) * tf.ones([7 * 7 * C, n_hidden])) W_3 = Laplace(loc=tf.zeros([n_hidden, K]), scale=std**2 / n_hidden * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(C), scale=tf.ones(C)) b_1 = Laplace(loc=tf.zeros(C), scale=tf.ones(C)) b_2 = Laplace(loc=tf.zeros(n_hidden), scale=std**2 / (7 * 7 * C) * tf.ones(n_hidden)) b_3 = Laplace(loc=tf.zeros(K), scale=std**2 / n_hidden * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([F, F, 1, C]), scale=tf.ones([F, F, 1, C])) W_1 = Normal(loc=tf.zeros([F, F, C, C]), scale=tf.ones([F, F, C, C])) W_2 = Normal(loc=tf.zeros([7 * 7 * C, n_hidden]), scale=std * (7 * 7 * C)**-.5 * tf.ones([7 * 7 * C, n_hidden])) W_3 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**-.5 * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(C), scale=tf.ones(C)) b_1 = Normal(loc=tf.zeros(C), scale=tf.ones(C)) b_2 = Normal(loc=tf.zeros(n_hidden), scale=std * (7 * 7 * C)**-.5 * tf.ones(n_hidden)) b_3 = Normal(loc=tf.zeros(K), scale=std * n_hidden**-.5 * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([F, F, 1, C]), loc=tf.zeros([F, F, 1, C]), scale=tf.ones([F, F, 1, C])) W_1 = StudentT(df=df * tf.ones([F, F, C, C]), loc=tf.zeros([F, F, C, C]), scale=tf.ones([F, F, C, C])) W_2 = StudentT(df=df * tf.ones([7 * 7 * C, n_hidden]), loc=tf.zeros([7 * 7 * C, n_hidden]), scale=std**2 / (7 * 7 * C) * tf.ones([7 * 7 * C, n_hidden])) W_3 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2 / n_hidden * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones(C), loc=tf.zeros(C), scale=tf.ones(C)) b_1 = StudentT(df=df * tf.ones(C), loc=tf.zeros(C), scale=tf.ones(C)) b_2 = StudentT(df=df * tf.ones(n_hidden), loc=tf.zeros(n_hidden), scale=std**2 / (7 * 7 * C) * tf.ones(n_hidden)) b_3 = StudentT(df=df * tf.ones(K), loc=tf.zeros(K), scale=std**2 / n_hidden * tf.ones(K)) x = tf.placeholder(tf.float32, [None, None]) # Categorical likelihood y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1, W_2, b_2, W_3, b_3)) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [None]) # Posteriors if str(sys.argv[4]) == 'normal': qW_0 = Empirical( params=tf.Variable(tf.random_normal([n_samp, F, F, 1, C]))) qW_1 = Empirical( params=tf.Variable(tf.random_normal([n_samp, F, F, C, C]))) qW_2 = Empirical(params=tf.Variable( tf.random_normal([n_samp, 7 * 7 * C, n_hidden], stddev=std * (7 * 7 * C)**-.5))) qW_3 = Empirical(params=tf.Variable( tf.random_normal([n_samp, n_hidden, K], stddev=std * (n_hidden)**-.5))) qb_0 = Empirical(params=tf.Variable(tf.random_normal([n_samp, C]))) qb_1 = Empirical(params=tf.Variable(tf.random_normal([n_samp, C]))) qb_2 = Empirical(params=tf.Variable( tf.random_normal([n_samp, n_hidden], stddev=std * (7 * 7 * C)**-.5))) qb_3 = Empirical(params=tf.Variable( tf.random_normal([n_samp, K], stddev=std * (n_hidden)**-.5))) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': # Use a placeholder otherwise cannot assign a tensor > 2GB p0 = tf.placeholder(tf.float32, [n_samp, F, F, 1, C]) p1 = tf.placeholder(tf.float32, [n_samp, F, F, C, C]) p2 = tf.placeholder(tf.float32, [n_samp, 7 * 7 * C, n_hidden]) p3 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) pp0 = tf.placeholder(tf.float32, [n_samp, C]) pp1 = tf.placeholder(tf.float32, [n_samp, C]) pp2 = tf.placeholder(tf.float32, [n_samp, n_hidden]) pp3 = tf.placeholder(tf.float32, [n_samp, K]) w0 = tf.Variable(p0) w1 = tf.Variable(p1) w2 = tf.Variable(p2) w3 = tf.Variable(p3) b0 = tf.Variable(pp0) b1 = tf.Variable(pp1) b2 = tf.Variable(pp2) b3 = tf.Variable(pp3) # Empirical distribution qW_0 = Empirical(params=w0) qW_1 = Empirical(params=w1) qW_2 = Empirical(params=w2) qW_3 = Empirical(params=w3) qb_0 = Empirical(params=b0) qb_1 = Empirical(params=b1) qb_2 = Empirical(params=b2) qb_3 = Empirical(params=b3) if str(sys.argv[3]) == 'hmc': inference = ed.HMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2, W_3: qW_3, b_3: qb_3 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2, W_3: qW_3, b_3: qb_3 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100, scale={y: float(mnist.train.num_examples) / N}) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100, scale={y: float(mnist.train.num_examples) / N}) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': return ((x, y), y_ph, W_0, b_0, W_1, b_1, W_2, b_2, qW_0, qb_0, qW_1, qb_1, qW_2, qb_2, qW_3, qb_3, inference, p0, p1, p2, p3, pp0, pp1, pp2, pp3, w0, w1, w2, w3, b0, b1, b2, b3) else: return ((x, y), y_ph, W_0, b_0, W_1, b_1, W_2, b_2, qW_0, qb_0, qW_1, qb_1, qW_2, qb_2, qW_3, qb_3, inference)
def main(_): ed.set_seed(42) # DATA X_train, y_train = build_toy_dataset(FLAGS.N) X_test, y_test = build_toy_dataset(FLAGS.N) # MODEL X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N)) # INFERENCE qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1])) inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(step_size=1e-3) # CRITICISM # Plot posterior samples. sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride], qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show() print("Displaying posterior predictive samples.") n_posterior_samples = 10 w_post = qw.sample(n_posterior_samples).eval() b_post = qb.sample(n_posterior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_posterior_samples): output = inputs * w_post[ns] + b_post[ns] plt.plot(inputs, output) plt.show()
def ed_graph_init(): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=(std**2 / D) * tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2 / n_hidden) * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2 / D) * tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(K), scale=(std**2 / n_hidden) * tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std * D**(-.5) * tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**(-.5) * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std * D**(-.5) * tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(K), scale=std * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=std2 * 2 / D * tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2 / n_hidden * tf.ones([n_hidden, K])) b_0 = StudentT(df=df * tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=std**2 / D * tf.ones(n_hidden)) b_1 = StudentT(df=df * tf.ones([K]), loc=tf.zeros(K), scale=std**2 / n_hidden * tf.ones(K)) x = tf.placeholder(tf.float32, [None, None]) # Categorical likelihood y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1)) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [None]) # Posteriors if str(sys.argv[4]) == 'normal': qW_0 = Empirical(params=tf.Variable( tf.random_normal([n_samp, D, n_hidden], stddev=std * (D**-.5)))) qW_1 = Empirical(params=tf.Variable( tf.random_normal([n_samp, n_hidden, K], stddev=std * (n_hidden**-.5)))) qb_0 = Empirical(params=tf.Variable( tf.random_normal([n_samp, n_hidden], stddev=std * (D**-.5)))) qb_1 = Empirical(params=tf.Variable( tf.random_normal([n_samp, K], stddev=std * (n_hidden**-.5)))) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': # Use a placeholder otherwise cannot assign a tensor > 2GB w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distribution will be laplace(0,1) qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) if str(sys.argv[3]) == 'hmc': inference = ed.HMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size=leap_size, n_steps=step_no, n_print=100, scale={y: float(mnist.train.num_examples) / N}) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size=leap_size, friction=0.4, n_print=100, scale={y: float(mnist.train.num_examples) / N}) if str(sys.argv[4]) == 'laplace' or str(sys.argv[4]) == 'T': return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference, w0, w1, b0, b1) else: return (x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference
X_test, y_test = build_toy_dataset(N) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) # INFERENCE T = 5000 # Number of samples. nburn = 100 # Number of burn-in samples. stride = 10 # Frequency with which to plot samples. qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) qb = Empirical(params=tf.Variable(tf.random_normal([T, 1]))) inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(step_size=1e-3) # CRITICISM # Plot posterior samples. sns.jointplot(qb.params.eval()[nburn:T:stride], qw.params.eval()[nburn:T:stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(mu=ed.dot(X, qw) + qb, sigma=tf.ones(N))
fig, ax = plt.subplots() cs = ax.contour(X, Y, Z) if label: plt.clabel(cs, inline=1, fontsize=10) ed.set_seed(42) # MODEL z = MultivariateNormalFull(mu=tf.ones(2), sigma=tf.constant([[1.0, 0.8], [0.8, 1.0]])) # INFERENCE qz = Empirical(params=tf.Variable(tf.random_normal([5000, 2]))) inference = ed.SGHMC({z: qz}) inference.run(step_size=0.02) # CRITICISM sess = ed.get_session() mean, std = sess.run([qz.mean(), qz.std()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior std:") print(std) fig, ax = plt.subplots() trace = sess.run(qz.params) ax.scatter(trace[:, 0], trace[:, 1], marker=".") mvn_plot_contours(z, ax=ax) plt.show()
n_samples = 1000 x = tf.placeholder(tf.float32, [None, D]) w = Normal(loc=tf.zeros([D, K]), scale=tf.ones([D, K])) b = Normal(loc=tf.zeros(K), scale=tf.ones(K)) y = Categorical(tf.matmul(tf.cast(x, tf.float32), w) + b) #qw = Normal(loc=tf.Variable(tf.random_normal([D, K])),scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, K])))) #qb = Normal(loc=tf.Variable(tf.random_normal([K])),scale=tf.nn.softplus(tf.Variable(tf.random_normal([K])))) qw = Empirical(params=tf.Variable(tf.random_normal([n_samples, D, K]))) qb = Empirical(params=tf.Variable(tf.random_normal([n_samples, K]))) y_ph = tf.placeholder(tf.int32, [N]) #inference = ed.KLqp({w: qw, b: qb}, data={y:y_ph}) inference = ed.SGHMC({w: qw, b: qb}, data={y: y_ph}) inference.initialize(n_iter=n_samples, n_print=50, scale={y: float(rows) / N}, step_size=0.1, friction=1.0) #inference.initialize(n_iter=n_samples, n_print=50,step_size=0.1,friction=2.0) sess = tf.InteractiveSession() tf.global_variables_initializer().run() #X_train=mnist.train.images def next_batch(seq, size): return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))