def train_model(self, games, results, num_train_steps=10000): params_post = {p: q for p, q in zip(self.prior, self.var_post)} x = tf.placeholder(tf.int32, shape=[None, 3]) y = self.predict(x) print( 'accuracy, log_likelihood', ed.evaluate(['accuracy', 'log_likelihood'], data={ y: results, x: games })) inference = ed.KLqp(params_post, data={y: results, x: games}) inference.run(n_samples=32, n_iter=num_train_steps) # Get output object dependant on variational posteriors rather than priors out_post = ed.copy(y.d2, params_post) # Re-evaluate metrics print( 'accuracy, log_likelihood', ed.evaluate(['accuracy', 'log_likelihood'], data={ out_post: results, x: games }))
def main(): X_train, y_train, X_test, y_test, train_filenames, test_filenames = prepare_scutfbp5500( feat_layers=["conv4_1", "conv5_1"]) print('Shape of X_train: {0}'.format(X_train)) print('Shape of X_test: {0}'.format(X_test)) print('Shape of y_train: {0}'.format(y_train)) print('Shape of y_test: {0}'.format(y_test)) N = 3300 D = len(X_train[0]) X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qb/scale", [1]))) inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(n_samples=3300, n_iter=250) y_post = ed.copy(y, {w: qw, b: qb}) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Mean absolute error on test data:") print(ed.evaluate('mean_absolute_error', data={X: X_test, y_post: y_test}))
def test_n_samples(self): with self.test_session(): x = Normal(loc=0.0, scale=1.0) x_data = tf.constant(0.0) ed.evaluate('mean_squared_error', {x: x_data}, n_samples=1) ed.evaluate('mean_squared_error', {x: x_data}, n_samples=5) self.assertRaises(TypeError, ed.evaluate, 'mean_squared_error', {x: x_data}, n_samples='1')
def build_net(x_train, y_train, num_train_steps=10000, x_test=None, y_test=None): # Number of stats currently used to predict outcome- 23 per team + variable for side inputs = 47 outputs = 1 if x_test is None: x_test = x_train if y_test is None: y_test = y_train # widths of fully-connected layers in NN # Input data goes here (via feed_dict or equiv) x = tf.placeholder(tf.float32, shape=[None, inputs]) layer_widths = [16, 16, 16, 16, 16, 16] activations = [Nets.selu for _ in layer_widths] + [tf.identity] layer_widths += [outputs] net = Nets.SuperDenseNet(inputs, layer_widths, activations) # Construct all parameters of NN, set to independant gaussian priors params = [Nets.gauss_prior(shape) for shape in net.param_space()] out = ed.models.Bernoulli(logits=net.apply(x, params)) # Variational 'posterior's for NN params qparams = [Nets.gauss_var_post(w.shape) for w in params] asd = tf.train.AdamOptimizer # Map from random variables to their variational posterior objects params_post = {params[i]: qparams[i] for i in range(len(params))} # evaluate accuracy and likelihood of model over the dataset before training print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out: y_test, x: x_test })) # Run variational inference, minimizing KL(q, p) using stochastic gradient descent over variational params inference = ed.KLqp(params_post, data={out: y_train, x: x_train}) #inference.initialize(optimizer=YFOptimizer()) inference.run(n_samples=32, n_iter=num_train_steps) # Get output object dependant on variational posteriors rather than priors out_post = ed.copy(out, params_post) # Re-evaluate metrics print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out_post: y_test, x: x_test }))
def test_metrics_classification(self): with self.test_session(): x = Bernoulli(probs=0.51) x_data = tf.constant(1) self.assertAllClose( 1.0, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1)) x = Bernoulli(probs=0.51, sample_shape=5) x_data = tf.constant([1, 1, 1, 0, 0]) self.assertAllClose( 0.6, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1)) x = Bernoulli(probs=tf.constant([0.51, 0.49, 0.49])) x_data = tf.constant([1, 0, 1]) self.assertAllClose( 2.0 / 3, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1)) x = Categorical(probs=tf.constant([0.48, 0.51, 0.01])) x_data = tf.constant(1) self.assertAllClose( 1.0, ed.evaluate('sparse_categorical_accuracy', {x: x_data}, n_samples=1)) x = Categorical(probs=tf.constant([0.48, 0.51, 0.01]), sample_shape=5) x_data = tf.constant([1, 1, 1, 0, 2]) self.assertAllClose( 0.6, ed.evaluate('sparse_categorical_accuracy', {x: x_data}, n_samples=1)) x = Categorical( probs=tf.constant([[0.48, 0.51, 0.01], [0.51, 0.48, 0.01]])) x_data = tf.constant([1, 2]) self.assertAllClose( 0.5, ed.evaluate('sparse_categorical_accuracy', {x: x_data}, n_samples=1)) x = Multinomial(total_count=1.0, probs=tf.constant([0.48, 0.51, 0.01])) x_data = tf.constant([0, 1, 0], dtype=x.dtype.as_numpy_dtype) self.assertAllClose( 1.0, ed.evaluate('categorical_accuracy', {x: x_data}, n_samples=1)) x = Multinomial(total_count=1.0, probs=tf.constant([0.48, 0.51, 0.01]), sample_shape=5) x_data = tf.constant( [[0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]], dtype=x.dtype.as_numpy_dtype) self.assertAllClose( 0.6, ed.evaluate('categorical_accuracy', {x: x_data}, n_samples=1)) x = Multinomial(total_count=5.0, probs=tf.constant([0.4, 0.6, 0.0])) x_data = tf.constant([2, 3, 0], dtype=x.dtype.as_numpy_dtype) self.assertAllClose( 1.0, ed.evaluate('multinomial_accuracy', {x: x_data}, n_samples=1))
def test_data(self): with self.test_session(): x_ph = tf.placeholder(tf.float32, []) x = Normal(loc=x_ph, scale=1.0) y = 2.0 * Normal(loc=0.0, scale=1.0) x_data = tf.constant(0.0) x_ph_data = np.array(0.0) y_data = tf.constant(20.0) ed.evaluate('mean_squared_error', {x: x_data, x_ph: x_ph_data}, n_samples=1) ed.evaluate('mean_squared_error', {y: y_data}, n_samples=1) self.assertRaises(TypeError, ed.evaluate, 'mean_squared_error', {'y': y_data}, n_samples=1)
def test_data(self): tf.InteractiveSession() x_ph = tf.placeholder(tf.float32, []) x = Normal(mu=x_ph, sigma=1.0) y = 2.0 * Normal(mu=0.0, sigma=1.0) x_data = tf.constant(0.0) x_ph_data = np.array(0.0) y_data = tf.constant(20.0) ed.evaluate('mean_squared_error', { x: x_data, x_ph: x_ph_data }, n_samples=1) ed.evaluate('mean_squared_error', {y: y_data}, n_samples=1)
def plot_predictions(y_post, X_test, y_test): """Plot y together with y hat.""" y_pred = ed.evaluate(raw_predictions, data={y_post: y_test}) plt.scatter(X_test, y_test, label="truth") plt.scatter(X_test, y_pred, label="predicted") plt.legend() plt.show()
def train_model(x_train, y_train): # x = tf.placeholder(tf.float32, shape=[None, 2]) # widths of fully-connected layers in NN inputs = 2 outputs = 1 layer_widths = [8, 8, 8, 8, 8] activations = [tf.nn.elu for _ in layer_widths] + [tf.identity] layer_widths += [outputs] # Input data goes here (via feed_dict or equiv) x = tf.placeholder(tf.float32, shape=[len(x_train), inputs]) net = Nets.SuperDenseNet(inputs, layer_widths, activations) # Construct all parameters of NN, set to independant gaussian priors weights = [gauss_prior(shape) for shape in net.weight_shapes()] biases = [gauss_prior(shape) for shape in net.bias_shapes()] out = ed.models.Bernoulli(logits=net.apply(x, weights, biases)) # Variational 'posterior's for NN params qweights = [gauss_var_post(w.shape) for w in weights] qbiases = [gauss_var_post(b.shape) for b in biases] # Map from random variables to their variational posterior objects weights_post = {weights[i]: qweights[i] for i in range(len(weights))} biases_post = {biases[i]: qbiases[i] for i in range(len(weights))} var_post = {**weights_post, **biases_post} # evaluate 'accuracy' (what even is this??) and likelihood of model over the dataset before training print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out: y_train, x: x_train })) # Run variational inference, minimizing KL(q, p) using stochastic gradient descent over variational params inference = ed.KLqp(var_post, data={out: y_train, x: x_train}) inference.run(n_samples=16, n_iter=10000) # Get output object dependant on variational posteriors rather than priors out_post = ed.copy(out, var_post) # Re-evaluate metrics print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out_post: y_train, x: x_train }))
def test_custom_metric(self): def logcosh(y_true, y_pred): diff = y_pred - y_true return tf.reduce_mean(diff + tf.nn.softplus(-2.0 * diff) - tf.log(2.0), axis=-1) with self.test_session(): x = Normal(loc=0.0, scale=1.0) x_data = tf.constant(0.0) ed.evaluate(logcosh, {x: x_data}, n_samples=1) ed.evaluate(['mean_squared_error', logcosh], {x: x_data}, n_samples=1) self.assertRaises(NotImplementedError, ed.evaluate, 'logcosh', {x: x_data}, n_samples=1)
def test_output_key(self): tf.InteractiveSession() x_ph = tf.placeholder(tf.float32, []) x = Normal(mu=x_ph, sigma=1.0) y = 2.0 * x x_data = tf.constant(0.0) x_ph_data = np.array(0.0) y_data = tf.constant(20.0) ed.evaluate('mean_squared_error', { x: x_data, x_ph: x_ph_data }, n_samples=1) ed.evaluate('mean_squared_error', { y: y_data, x_ph: x_ph_data }, n_samples=1) ed.evaluate('mean_squared_error', { x: x_data, y: y_data, x_ph: x_ph_data }, n_samples=1, output_key=x) self.assertRaises(KeyError, ed.evaluate, 'mean_squared_error', { x: x_data, y: y_data, x_ph: x_ph_data }, n_samples=1)
def fit(self, X, y): pairs = {} for ii in range(len(self.weights)): pairs.update({ self.weights[ii]: self.qW[ii], self.biases[ii]: self.qb[ii] }) inference = KLqp(pairs, data={self.X: X, self.y: y}) inference.run(logdir="log") mse = evaluate("mean_squared_error", data={self.y: y, self.X: X}) return np.sqrt(mse)
def test_metrics_classification(self): with self.test_session(): x = Bernoulli(probs=0.51) x_data = tf.constant(1) self.assertAllClose( 1.0, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1)) x = Bernoulli(probs=0.51, sample_shape=5) x_data = tf.constant([1, 1, 1, 0, 0]) self.assertAllClose( 0.6, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1)) x = Bernoulli(probs=tf.constant([0.51, 0.49, 0.49])) x_data = tf.constant([1, 0, 1]) self.assertAllClose( 2.0 / 3, ed.evaluate('binary_accuracy', {x: x_data}, n_samples=1)) x = Categorical(probs=tf.constant([0.48, 0.51, 0.01])) x_data = tf.constant(1) self.assertAllClose( 1.0, ed.evaluate('sparse_categorical_accuracy', {x: x_data}, n_samples=1)) x = Categorical(probs=tf.constant([0.48, 0.51, 0.01]), sample_shape=5) x_data = tf.constant([1, 1, 1, 0, 2]) self.assertAllClose( 0.6, ed.evaluate('sparse_categorical_accuracy', {x: x_data}, n_samples=1)) x = Categorical( probs=tf.constant([[0.48, 0.51, 0.01], [0.51, 0.48, 0.01]])) x_data = tf.constant([1, 2]) self.assertAllClose( 0.5, ed.evaluate('sparse_categorical_accuracy', {x: x_data}, n_samples=1)) x = Multinomial(total_count=1.0, probs=tf.constant([0.48, 0.51, 0.01])) x_data = tf.constant([0, 1, 0], dtype=x.dtype.as_numpy_dtype) self.assertAllClose( 1.0, ed.evaluate('categorical_accuracy', {x: x_data}, n_samples=1)) x = Multinomial(total_count=1.0, probs=tf.constant([0.48, 0.51, 0.01]), sample_shape=5) x_data = tf.constant( [[0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]], dtype=x.dtype.as_numpy_dtype) self.assertAllClose( 0.6, ed.evaluate('categorical_accuracy', {x: x_data}, n_samples=1))
def test_metrics(self): with self.test_session(): x = Normal(loc=0.0, scale=1.0) x_data = tf.constant(0.0) ed.evaluate('mean_squared_error', {x: x_data}, n_samples=1) ed.evaluate(['mean_squared_error'], {x: x_data}, n_samples=1) ed.evaluate(['mean_squared_error', 'mean_absolute_error'], {x: x_data}, n_samples=1) self.assertRaises(TypeError, ed.evaluate, x, {x: x_data}, n_samples=1) self.assertRaises(NotImplementedError, ed.evaluate, 'hello world', {x: x_data}, n_samples=1)
def test_metrics(self): tf.InteractiveSession() x = Normal(mu=0.0, sigma=1.0) x_data = tf.constant(0.0) ed.evaluate('mean_squared_error', {x: x_data}, n_samples=1) ed.evaluate(['mean_squared_error'], {x: x_data}, n_samples=1) ed.evaluate(['mean_squared_error', 'mean_absolute_error'], {x: x_data}, n_samples=1) self.assertRaises(NotImplementedError, ed.evaluate, 'hello world', {x: x_data}, n_samples=1)
def main(_): # true latent factors U_true = np.random.randn(FLAGS.D, FLAGS.N) V_true = np.random.randn(FLAGS.D, FLAGS.M) # DATA R_true = build_toy_dataset(U_true, V_true, FLAGS.N, FLAGS.M) I_train = get_indicators(FLAGS.N, FLAGS.M) I_test = 1 - I_train # MODEL I = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.M]) U = Normal(loc=0.0, scale=1.0, sample_shape=[FLAGS.D, FLAGS.N]) V = Normal(loc=0.0, scale=1.0, sample_shape=[FLAGS.D, FLAGS.M]) R = Normal(loc=tf.matmul(tf.transpose(U), V) * I, scale=tf.ones([FLAGS.N, FLAGS.M])) # INFERENCE qU = Normal(loc=tf.get_variable("qU/loc", [FLAGS.D, FLAGS.N]), scale=tf.nn.softplus( tf.get_variable("qU/scale", [FLAGS.D, FLAGS.N]))) qV = Normal(loc=tf.get_variable("qV/loc", [FLAGS.D, FLAGS.M]), scale=tf.nn.softplus( tf.get_variable("qV/scale", [FLAGS.D, FLAGS.M]))) inference = ed.KLqp({U: qU, V: qV}, data={R: R_true, I: I_train}) inference.run() # CRITICISM qR = Normal(loc=tf.matmul(tf.transpose(qU), qV), scale=tf.ones([FLAGS.N, FLAGS.M])) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={qR: R_true, I: I_test})) plt.imshow(R_true, cmap='hot') plt.show() R_est = tf.matmul(tf.transpose(qU), qV).eval() plt.imshow(R_est, cmap='hot') plt.show()
def test(self, filename, data): with tf.Session() as sess: restorer = tf.train.import_meta_graph(filename, clear_devices=True) restorer.restore(sess, tf.train.latest_checkpoint('./')) discrete_variable_vars = tf.get_collection('d') discrete_variable_prior_pi = tf.get_collection('d_pi') discrete_variable_prior_pi_q = tf.get_collection('d_pi_q') discrete_variable_post_map = dict([ (ed.copy( discrete_variable_vars[idx], { discrete_variable_prior_pi[idx]: discrete_variable_prior_pi_q[idx] }), data[:, idx].tolist()) for idx in tuple(np.arange(self.num_discrete_variables)) ]) continus_variable_data_map = dict( zip(tf.get_collection('c'), data[:, self.continus_variable_idxs].flatten('F').tolist())) return ed.evaluate('log_likelihood', data=dict(discrete_variable_post_map.items(), continus_variable_data_map.items()))
def evaluate(metrics, data, n_samples=500, output_key=None, seed=None): """ Evaluate a fitted inferpy model using a set of metrics. This function encapsulates the equivalent Edward one. Args: metrics: list of str indicating the metrics or sccore functions to be used. An example of use: .. literalinclude:: ../../examples/evaluate.py :language: python :lines: 52,53 Returns: list of float or float: A list of evaluations or a single evaluation. Raises: NotImplementedError: If an input metric does not match an implemented metric. """ data_ed = {} for (key, value) in iteritems(data): data_ed.update({ key.dist if isinstance(key, inf.models.RandomVariable) else key: value.dist if isinstance(value, inf.models.RandomVariable) else value }) output_key_ed = output_key.dist if isinstance( output_key, inf.models.RandomVariable) else output_key return ed.evaluate(metrics, data_ed, n_samples, output_key_ed, seed)
W = tf.transpose(zs[:, 1:]) y_pred = tf.reduce_mean(tf.matmul(x_test, W) + b, 1) return y_pred, y_test def build_toy_dataset(coeff, n_data=40, n_data_test=20, noise_std=0.1): ed.set_seed(0) n_dim = len(coeff) x = np.random.randn(n_data+n_data_test, n_dim) y = np.dot(x, coeff) + norm.rvs(0, noise_std, size=(n_data+n_data_test)) y = y.reshape((n_data+n_data_test, 1)) data = np.concatenate((y[:n_data,:], x[:n_data,:]), axis=1) data = tf.constant(data, dtype=tf.float32) data_test = np.concatenate((y[n_data:,:], x[n_data:,:]), axis=1) data_test = tf.constant(data_test, dtype=tf.float32) return ed.Data(data), ed.Data(data_test) ed.set_seed(42) model = LinearModel() variational = Variational() variational.add(Normal(model.num_vars)) coeff = np.random.randn(10) data, data_test = build_toy_dataset(coeff) inference = ed.MFVI(model, variational, data) sess = inference.run(n_iter=250, n_minibatch=5, n_print=10) print(ed.evaluate('mse', model, variational, data_test, sess))
test_acc = [] for _ in range(inference.n_iter): # Start timer - make sure only the actual inference part is calculated if _ == 0: total = timeit.default_timer() start = timeit.default_timer() info_dict = inference.update(feed_dict={x: X_train, y_ph: Y_train}) inference.print_progress(info_dict) elapsed = timeit.default_timer() - start total = total + elapsed if (_ + 1) % 50 == 0 or _ == 0: y_post = ed.copy(y, {W_0: qW_0, W_1: qW_1, b_0: qb_0, b_1: qb_1}) mse_tmp = ed.evaluate('mse', data={ x: X_test, y_post: Y_test }, n_samples=500) print('\nIter ', _ + 1, ' -- MSE: ', mse_tmp) test_acc.append(mse_tmp) # Save test accuracy during training name = path + '/test_mse.csv' np.savetxt(name, test_acc, fmt='%.5f', delimiter=',') ## Model Evaluation # y_post = ed.copy(y, {W_0: qW_0, W_1: qW_1, b_0: qb_0, b_1: qb_1}) if str(sys.argv[3]) != 'kl': W0_opt = (qW_0.params.eval()[nburn:, :, :]).mean(axis=0) W1_opt = (qW_1.params.eval()[nburn:, :, :]).mean(axis=0)
def main(_): # true latent factors U_true = np.random.randn(FLAGS.D, FLAGS.N) V_true = np.random.randn(FLAGS.D, FLAGS.M) ## DATA #R_true = build_toy_dataset(U_true, V_true, FLAGS.N, FLAGS.M) #I_train = get_indicators(FLAGS.N, FLAGS.M) #I_test = 1 - I_train #N = FLAGS.N #M = FLAGS.M #tr = sio.loadmat(os.path.expanduser("~/data/bbbvi/trainData1.mat"))['X'] #te = sio.loadmat(os.path.expanduser("~/data/bbbvi/testData1.mat"))['X'] #tr = tr[:,:100] #te = te[:,:100] #I_train = tr != 0 #I_test = te != 0 #R_true = (tr + te).astype(np.float32) #N,M = R_true.shape tr = sio.loadmat(os.path.expanduser("~/data/bbbvi/cbcl.mat"))['V'] te = sio.loadmat(os.path.expanduser("~/data/bbbvi/cbcl.mat"))['V'] #I_train = np.ones(tr.shape) #I_test = np.ones(tr.shape) R_true = tr N, M = tr.shape D = FLAGS.D I_train = get_indicators(N, M, FLAGS.mask_ratio) I_test = 1 - I_train it_best = 0 weights, qUVt_components, mses = [], [], [] test_mses, test_lls = [], [] for iter in range(FLAGS.n_fw_iter): print("iter", iter) g = tf.Graph() with g.as_default(): tf.set_random_seed(FLAGS.seed) sess = tf.InteractiveSession() with sess.as_default(): # MODEL I = tf.placeholder(tf.float32, [N, M]) scale_uv = tf.concat( [tf.ones([FLAGS.D, N]), tf.ones([FLAGS.D, M])], axis=1) mean_uv = tf.concat( [tf.zeros([FLAGS.D, N]), tf.zeros([FLAGS.D, M])], axis=1) UV = Normal(loc=mean_uv, scale=scale_uv) R = Normal(loc=tf.matmul(tf.transpose(UV[:, :N]), UV[:, N:]) * I, scale=tf.ones([N, M])) mean_quv = tf.concat([ tf.get_variable("qU/loc", [FLAGS.D, N]), tf.get_variable("qV/loc", [FLAGS.D, M]) ], axis=1) scale_quv = tf.concat([ tf.nn.softplus(tf.get_variable("qU/scale", [FLAGS.D, N])), tf.nn.softplus(tf.get_variable("qV/scale", [FLAGS.D, M])) ], axis=1) qUV = Normal(loc=mean_quv, scale=scale_quv) inference = relbo.KLqp({UV: qUV}, data={ R: R_true, I: I_train }, fw_iterates=get_fw_iterates( iter, weights, UV, qUVt_components), fw_iter=iter) inference.run(n_iter=100) gamma = 2. / (iter + 2.) weights = [(1. - gamma) * w for w in weights] weights.append(gamma) qUVt_components = update_iterate(qUVt_components, qUV) qUV_new = build_mixture(weights, qUVt_components) qR = Normal(loc=tf.matmul(tf.transpose(qUV_new[:, :N]), qUV_new[:, N:]), scale=tf.ones([N, M])) # CRITICISM test_mse = ed.evaluate('mean_squared_error', data={ qR: R_true, I: I_test.astype(bool) }) test_mses.append(test_mse) print('test mse', test_mse) test_ll = ed.evaluate('log_lik', data={ qR: R_true.astype('float32'), I: I_test.astype(bool) }) test_lls.append(test_ll) print('test_ll', test_ll) np.savetxt(os.path.join(FLAGS.outdir, 'test_mse.csv'), test_mses, delimiter=',') np.savetxt(os.path.join(FLAGS.outdir, 'test_ll.csv'), test_lls, delimiter=',')
I_train = get_indicators(N, M) I_test = 1 - I_train # MODEL I = tf.placeholder(tf.float32, [N, M]) U = Normal(mu=tf.zeros([D, N]), sigma=tf.ones([D, N])) V = Normal(mu=tf.zeros([D, M]), sigma=tf.ones([D, M])) R = Normal(mu=tf.matmul(tf.transpose(U), V) * I, sigma=tf.ones([N, M])) # INFERENCE qU = Normal(mu=tf.Variable(tf.random_normal([D, N])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, N])))) qV = Normal(mu=tf.Variable(tf.random_normal([D, M])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, M])))) inference = ed.KLqp({U: qU, V: qV}, data={R: R_true, I: I_train}) inference.run() # CRITICISM qR = Normal(mu=tf.matmul(tf.transpose(qU), qV), sigma=tf.ones([N, M])) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={qR: R_true, I: I_test})) plt.imshow(R_true, cmap='hot') plt.show() R_est = tf.matmul(tf.transpose(qU), qV).eval() plt.imshow(R_est, cmap='hot') plt.show()
I_train = get_indicators(N, M) I_test = 1 - I_train # MODEL I = tf.placeholder(tf.float32, [N, M]) U = Normal(loc=tf.zeros([D, N]), scale=tf.ones([D, N])) V = Normal(loc=tf.zeros([D, M]), scale=tf.ones([D, M])) R = Normal(loc=tf.matmul(tf.transpose(U), V) * I, scale=tf.ones([N, M])) # INFERENCE qU = Normal(loc=tf.Variable(tf.random_normal([D, N])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, N])))) qV = Normal(loc=tf.Variable(tf.random_normal([D, M])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, M])))) inference = ed.KLqp({U: qU, V: qV}, data={R: R_true, I: I_train}) inference.run() # CRITICISM qR = Normal(loc=tf.matmul(tf.transpose(qU), qV), scale=tf.ones([N, M])) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={qR: R_true, I: I_test})) plt.imshow(R_true, cmap='hot') plt.show() R_est = tf.matmul(tf.transpose(qU), qV).eval() plt.imshow(R_est, cmap='hot') plt.show()
def save(arr,xdata,ydata): tf.reset_default_graph() trainSetNumber = round(FLAGS.T* 0.8) x_train = xdata[:trainSetNumber] y_train = ydata[:trainSetNumber] x_test = xdata[trainSetNumber:] y_test = ydata[trainSetNumber:] x_train = np.asarray(x_train) x_test = np.asarray(x_test) x_train = np.asarray(x_train) x_test = np.asarray(x_test) # print(x_test) # print(y_test) pos = 0 name = arr[pos] pos +=1 H1 = int(arr[pos]) pos+=1 H2 = int(arr[pos]) pos+=1 param1 = float(arr[pos]) pos += 1 param2 = float(arr[pos]) graph1 = tf.Graph() with graph1.as_default(): with tf.name_scope("model"): W_0 = Normal(loc=tf.zeros([FLAGS.D, H1]), scale=param1*tf.ones([FLAGS.D,H1 ]),name="W_0") W_1 = Normal(loc=tf.zeros([H1, H2]), scale=param2*tf.ones([H1, H2]), name="W_1") W_2 = Normal(loc=tf.zeros([H2, FLAGS.O]), scale=param2*tf.ones([H2, FLAGS.O]), name="W_2") b_0 = Normal(loc=tf.zeros(H1), scale=param1 *tf.ones(H1), name="b_0") b_1 = Normal(loc=tf.zeros(H2), scale=param2* tf.ones(H2), name="b_1") b_2 = Normal(loc=tf.zeros(FLAGS.O), scale=param2* tf.ones(FLAGS.O), name="b_2") X = tf.placeholder(tf.float32, [trainSetNumber, FLAGS.D], name="X") y = Normal(loc=neural_network(x_train,W_0, W_1, W_2, b_0, b_1, b_2, trainSetNumber), scale=0.1*tf.ones([trainSetNumber,FLAGS.O]), name="y") with tf.variable_scope("posterior",reuse=tf.AUTO_REUSE): with tf.variable_scope("qW_0",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [FLAGS.D, H1]) scale = param1*tf.nn.softplus(tf.get_variable("scale", [FLAGS.D, H1])) qW_0 = Normal(loc=loc, scale=scale) with tf.variable_scope("qW_1",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [H1, H2]) scale = param2*tf.nn.softplus(tf.get_variable("scale", [H1, H2])) qW_1 = Normal(loc=loc, scale=scale) with tf.variable_scope("qW_2",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [H2, FLAGS.O]) scale = param2*tf.nn.softplus(tf.get_variable("scale", [H2, FLAGS.O])) qW_2 = Normal(loc=loc, scale=scale) with tf.variable_scope("qb_0",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [H1]) scale =param1 * tf.nn.softplus(tf.get_variable("scale", [H1])) qb_0 = Normal(loc=loc, scale=scale) with tf.variable_scope("qb_1",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [H2]) scale =param2 * tf.nn.softplus(tf.get_variable("scale", [H2])) qb_1 = Normal(loc=loc, scale=scale) with tf.variable_scope("qb_2",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [FLAGS.O]) scale =param2 * tf.nn.softplus(tf.get_variable("scale", [FLAGS.O])) qb_2 = Normal(loc=loc, scale=scale) #inference with tf.Session(graph=graph1) as sess: # Set up the inference method, mapping the prior to the posterior variables inference = ed.KLqp({W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}, data={X: x_train, y: y_train}) # Set up the adam optimizer global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,100, 0.3, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) # Run the inference method pos += 1 iter1 = arr[pos] inference.run(n_iter=iter1,optimizer=optimizer ,n_samples=5) #Run the test data through the neural network infered = neural_network(x_test, qW_0, qW_1, qW_2, qb_0, qb_1, qb_2, len(x_test)) inferedList = infered.eval() #Accuracy checks on the data (The test data) # In order to work with PPC and other metrics, it must be a random variables # Normal creates this random varaibles by sampling from the poterior with a normal distribution NormalTest =Normal(loc=neural_network(x_test, qW_0, qW_1, qW_2, qb_0, qb_1, qb_2,len(x_test)), scale=0.1*tf.ones([len(x_test),FLAGS.O]), name="y_other") NormalTestList = NormalTest.eval() # Change the graph so that the posterior point to the output y_post = ed.copy(NormalTest, {W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}) X = tf.placeholder(tf.float32, [len(x_test), FLAGS.D], name="X") y_test_tensor = tf.convert_to_tensor(y_test) MSE = ed.evaluate('mean_squared_error', data={X: x_test, NormalTest: y_test_tensor}) MAE =ed.evaluate('mean_absolute_error', data={X: x_test, NormalTest: y_test_tensor}) # PPC calculation PPCMean = ed.ppc(lambda xs, zs: tf.reduce_mean(xs[y_post]), data={y_post: y_test, X:x_test}, latent_vars={W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}, n_samples=5) # Change the graph again, this is done to do epistemic uncertainty calculations posterior = ed.copy(NormalTest, dict_swap={W_0: qW_0.mean(), b_0: qb_0.mean(),W_1: qW_1.mean(), b_1: qb_1.mean(),W_2: qW_2.mean(), b_2: qb_2.mean()}) Y_post1 = sess.run(posterior.sample(len(x_test)), feed_dict={X: x_test, posterior: y_test}) mean_prob_over_samples=np.mean(Y_post1, axis=0) ## prediction means prediction_variances = np.apply_along_axis(predictive_entropy, axis=1, arr=mean_prob_over_samples) # Run analysis on test data, to see how many records were correct classes, actualClass, cor, firsts, seconds, thirds, fails, perCorrect = Analysis(inferedList, y_test) # Save the model through TF saver saver = tf.train.Saver() dir_path = os.path.dirname(os.path.realpath(__file__)) save_path = saver.save(sess, dir_path +"/"+name+"/model.ckpt") print("Model saved in path: %s" % save_path) file = open(dir_path+"/"+name +"/"+name+".csv",'w') file.write("MSE = " + str(MSE)) file.write("\nMAE = " + str(MAE)) file.write("\nPPC mean = " + str(PPCMean)) file.write("; Predicted First;Predicted Second; Predicted Third; Predicted Fail \n") classNames = ['First','Second', 'Third', 'Fail'] for x in range(len(firsts)): file.write(classNames[x] + ";" + str(firsts[x]) + ";" + str(seconds[x])+ ";" + str(thirds[x])+ ";" + str(fails[x]) + "\n") file.write("Num;Class 1;Class 2;Class 3;Class 4;Epi;Predicted Class;Correct Class\n ") for x in range(len(inferedList)): line = str(x) for i in range(len(inferedList[x])): line += ";" + str(round(inferedList[x][i],2)) line += ";" + str(round(prediction_variances[x],2)) + ";" + str(classes[x]+1) + ";" + str(actualClass[x]+1) + "\n" file.write(line) file.close() return perCorrect
with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print(prediction.get_shape().as_list()) train_output = np.reshape(train_output, [-1, n_chunks, n_classes]) inference = ed.KLqp({ weights: qWeights, biases: qbaises }, data={ data: train_input, prediction: train_output }) inference.run(n_samples=5, n_iter=100) out_post = ed.copy(prediction, {weights: qWeights, biases: qbaises}) print("Accuracy on test data:") test_output = np.reshape(test_output, [-1, n_chunks, n_classes]) print( ed.evaluate('mean_squared_error', data={ data: test_input, out_post: test_output })) stop = timeit.default_timer() print('It took', stop - start, 'secs')
def compute_mean_absolute_error(y_posterior, X_val_feed_dict, y_val): data = {y_posterior: y_val} data.update(X_val_feed_dict) mae = ed.evaluate('mean_absolute_error', data=data) return mae
n_samples = np.shape(feature_nd)[0] feature = feature_nd[:, :np.shape(feature_nd)[1] - 1] feature = np.float32(feature) price = feature_nd[:, -1] price = np.float32(price) X = tf.placeholder(tf.float32, [np.shape(feature)[0], 11]) w = Normal(loc=tf.zeros(11), scale=tf.ones(11)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(np.shape(feature)[0])) qw = Normal(loc=tf.Variable(tf.random_normal([11])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([11])))) qb = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) inference = ed.KLqp({w: qw, b: qb}, data={X: feature, y: price}) inference.run(n_samples=11, n_iter=250) print("debug") test_all = np.genfromtxt( 'C:\\Users\\Administrator\\Desktop\\数学建模\\mlp_regression_val.csv', dtype=float, delimiter=',') test_feature = test_all[:, :np.shape(test_all)[1] - 1] test_feature = np.float32(test_feature) test_price = test_all[:, -1] test_price = np.float32(test_price) y_post = ed.copy(y, {w: qw, b: qb}) y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(np.shape(feature)[0])) ed.evaluate('mean_squared_error', data={X: test_feature, y_post: test_price})
stds = [[0.1, 0.1], [0.1, 0.1]] x = np.zeros((N, 2), dtype=np.float32) for n in range(N): k = np.argmax(np.random.multinomial(1, pi)) x[n, :] = np.random.multivariate_normal(mus[k], np.diag(stds[k])) return {'x': x} ed.set_seed(42) data = build_toy_dataset(500) plt.scatter(data['x'][:, 0], data['x'][:, 1]) plt.axis([-3, 3, -3, 3]) plt.title("Simulated dataset") plt.show() model = MixtureGaussian(K=2, D=2) variational = Variational() variational.add(Dirichlet(model.K)) variational.add(Normal(model.K*model.D)) variational.add(InvGamma(model.K*model.D)) inference = ed.MFVI(model, variational, data) inference.run(n_iter=4000, n_samples=50, n_minibatch=10) clusters = np.argmax(ed.evaluate('log_likelihood', model, variational, data), axis=0) plt.scatter(data['x'][:, 0], data['x'][:, 1], c=clusters, cmap=cm.bwr) plt.axis([-3, 3, -3, 3]) plt.title("Predicted cluster assignments") plt.show()
# Training - Phase 1 test_acc = [] for _ in range(inference.n_iter): # Start timer - make sure only the actual inference part is calculated if _ == 0: total = timeit.default_timer() start = timeit.default_timer() # TensorFlow method gives the label data in a one hot vetor format. We convert that into a single label. info_dict = inference.update(feed_dict={x: resize(X_train), y_ph: Y_train}) inference.print_progress(info_dict) elapsed = timeit.default_timer() - start total = total + elapsed if (_ + 1 ) % 50 == 0 or _ == 0: y_post = ed.copy(y, {W_0: qW_0, W_1: qW_1, b_0: qb_0, b_1: qb_1}) acc_tmp = ed.evaluate('sparse_categorical_accuracy', data={x: resize(X_test), y_post: Y_test}, n_samples=100) print('\nIter ', _+1, ' -- Accuracy: ', acc_tmp) test_acc.append(acc_tmp) # Save test accuracy during training name = path + '/test_acc.csv' np.savetxt(name, test_acc, fmt = '%.5f', delimiter=',') ## Model Evaluation # y_post = ed.copy(y, {W_0: qW_0, W_1: qW_1, b_0: qb_0, b_1: qb_1}) if str(sys.argv[3]) != 'kl': W0_opt = (qW_0.params.eval()[nburn:, :, :]).mean(axis=0) W1_opt = (qW_1.params.eval()[nburn:, :, :]).mean(axis=0) b0_opt = (qb_0.params.eval()[nburn:, :]).mean(axis=0) b1_opt = (qb_1.params.eval()[nburn:, :]).mean(axis=0)
# doing this because I don't know how to make the testing work otherwise # it seems like the test and training data need to have the same N X_test, y_test = X_test[:-1, :], y_test[:-1] # unfortunately not sure how to make the linear kernel work at this moment N, P = X_train.shape X_tf = tf.placeholder(tf.float32, [N, P]) # latent stochastic function # ok so here in the loc position is where we can get (x *element-wise* b) b = Bernoulli(varbvs_prior, dtype=np.float32) # prior from varbvs gp_mu = tf.reduce_mean(tf.multiply(X_tf, tf.reshape(tf.tile(b, [N]), [N, P])), 1) # mean for prior over GP f = MultivariateNormalTriL( loc=gp_mu, scale_tril=tf.cholesky(rbf(X_tf)) # uses rbf kernel for covariance of GP for now ) qf = Normal(loc=tf.get_variable("qf/loc", [N]), scale=tf.nn.softplus(tf.get_variable("qf/scale", [N]))) # respose y_tf = Bernoulli(logits=f) # inference infer = ed.KLqp({f: qf}, data={X_tf: X_train, y_tf: y_train}) infer.run(n_samples=3, n_iter=5000) # criticism y_post = ed.copy(y_tf, {f: qf}) ed.evaluate('binary_accuracy', data={X_tf: X_test, y_post: y_test})
def main(_): def neural_network(X): h = tf.nn.sigmoid(tf.matmul(X, W_0)) h = tf.nn.sigmoid(tf.matmul(h, W_1)) h = tf.matmul(h, W_2) return tf.reshape(h, [-1]) ed.set_seed(42) # DATA X_train = np.loadtxt('X1.txt', delimiter=",") y_train = np.loadtxt('Y1.txt', delimiter=",") X_train = X_train.reshape((50, 2)) y_train = y_train.reshape((50, )) # MODEL with tf.name_scope("model"): W_0 = Normal(loc=3 * tf.ones([2, 2]), scale=tf.ones([2, 2]), name="W_0") W_1 = Normal(loc=-4 * tf.ones([2, 2]), scale=tf.ones([2, 2]), name="W_1") W_2 = Normal(loc=2 * tf.ones([2, 1]), scale=tf.ones([2, 1]), name="W_2") X = tf.placeholder(tf.float32, [50, 2], name="X") y = Normal(loc=neural_network(X), scale=0.1 * tf.ones(50), name="y") # INFERENCE with tf.variable_scope("posterior"): with tf.variable_scope("qW_0"): loc0 = tf.get_variable("loc", [2, 2], initializer=tf.constant_initializer(3)) scale0 = tf.nn.softplus( tf.get_variable("scale", [2, 2], initializer=tf.constant_initializer( math.log(math.e - 1)))) qW_0 = Normal(loc=loc0, scale=scale0) with tf.variable_scope("qW_1"): loc1 = tf.get_variable("loc", [2, 2], initializer=tf.constant_initializer(-4)) scale1 = tf.nn.softplus( tf.get_variable("scale", [2, 2], initializer=tf.constant_initializer( math.log(math.e - 1)))) qW_1 = Normal(loc=loc1, scale=scale1) with tf.variable_scope("qW_2"): loc2 = tf.get_variable("loc", [2, 1], initializer=tf.constant_initializer(2)) scale2 = tf.nn.softplus( tf.get_variable("scale", [2, 1], initializer=tf.constant_initializer( math.log(math.e - 1)))) qW_2 = Normal(loc=loc2, scale=scale2) inference = ed.KLqp({ W_0: qW_0, W_1: qW_1, W_2: qW_2 }, data={ X: X_train, y: y_train }) inference.run(n_samples=5, n_iter=10000) y_post = ed.copy(y, {W_0: qW_0, W_1: qW_1, W_2: qW_2}) print(loc0.eval(), scale0.eval(), loc1.eval(), scale1.eval(), loc2.eval(), scale2.eval()) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={ X: X_train, y_post: y_train })) print("Mean absolute error on test data:") print( ed.evaluate('mean_absolute_error', data={ X: X_train, y_post: y_train }))
# CRITICISM # Plot posterior samples. sns.jointplot(qb.params.eval()[nburn:T:stride], qw.params.eval()[nburn:T:stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show()
def main(_): # setting up output directory outdir = os.path.expanduser(FLAGS.outdir) #os.makedirs(outdir, exist_ok=True) # DATA N, M, D, R_true, I_train, I_test = get_data() # MODEL I = tf.placeholder(tf.float32, [N, M]) scale_uv = tf.concat( [tf.ones([D, N]), tf.ones([D, M])], axis=1) mean_uv = tf.concat( [tf.zeros([D, N]), tf.zeros([D, M])], axis=1) UV = Normal(loc=mean_uv, scale=scale_uv) #R = Normal( # loc=tf.matmul(tf.transpose(UV[:, :N]), UV[:, N:]) * I, # scale=tf.ones([N, M])) R = Normal( loc=tf.matmul(tf.transpose(UV[:, :N]), UV[:, N:]), scale=tf.ones([N, M])) # generator dist. for matrix R_mask = R * I # generated masked matrix sess = tf.InteractiveSession() p_joint = Joint(R_true, I_train, sess, D, N, M) # INFERENCE mean_suv = tf.concat([ tf.get_variable("qU/loc", [D, N]), tf.get_variable("qV/loc", [D, M]) ], axis=1) scale_suv = tf.concat([ tf.nn.softplus(tf.get_variable("qU/scale", [D, N])), tf.nn.softplus(tf.get_variable("qV/scale", [D, M])) ], axis=1) qUV = Normal(loc=mean_suv, scale=scale_suv) inference = ed.KLqp({UV: qUV}, data={R_mask: R_true, I: I_train}) inference.run(n_iter=FLAGS.VI_iter) # CRITICISM cR = ed.copy(R_mask, {UV: qUV}) # reconstructed matrix test_mse = ed.evaluate('mean_squared_error', data={ cR: R_true, I: I_test.astype(bool) }) logger.info("iters %d ed test mse %.5f" % (FLAGS.VI_iter, test_mse)) train_mse = ed.evaluate('mean_squared_error', data={ cR: R_true, I: I_train.astype(bool) }) logger.info("iters %d ed train mse %.5f" % (FLAGS.VI_iter, train_mse)) elbo_t = elbo(qUV, p_joint) logger.info('iters %d elbo %.2f' % (FLAGS.VI_iter, elbo_t))
def main(_): ed.set_seed(FLAGS.seed) # setting up output directory outdir = FLAGS.outdir if '~' in outdir: outdir = os.path.expanduser(outdir) os.makedirs(outdir, exist_ok=True) is_vector = FLAGS.base_dist in ['mvnormal', 'mvlaplace'] ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data() N, D = Xtrain.shape N_test, D_test = Xtest.shape assert D_test == D, 'Test dimension %d different than train %d' % (D_test, D) logger.info('D = %d, Ntrain = %d, Ntest = %d' % (D, N, N_test)) # Solution components weights, q_params = [], [] # L-continous gradient estimate lipschitz_estimate = None # Metrics to log times_filename = os.path.join(outdir, 'times.csv') open(times_filename, 'w').close() # (mean, +- std) elbos_filename = os.path.join(outdir, 'elbos.csv') logger.info('saving elbos to, %s' % elbos_filename) open(elbos_filename, 'w').close() rocs_filename = os.path.join(outdir, 'roc.csv') logger.info('saving rocs to, %s' % rocs_filename) open(rocs_filename, 'w').close() gap_filename = os.path.join(outdir, 'gap.csv') open(gap_filename, 'w').close() step_filename = os.path.join(outdir, 'steps.csv') open(step_filename, 'w').close() # (mean, std) ll_train_filename = os.path.join(outdir, 'll_train.csv') open(ll_train_filename, 'w').close() ll_test_filename = os.path.join(outdir, 'll_test.csv') open(ll_test_filename, 'w').close() # (bin_ac_train, bin_ac_test) bin_ac_filename = os.path.join(outdir, 'bin_ac.csv') open(bin_ac_filename, 'w').close() # 'adafw', 'ada_afw', 'ada_pfw' if FLAGS.fw_variant.startswith('ada'): lipschitz_filename = os.path.join(outdir, 'lipschitz.csv') open(lipschitz_filename, 'w').close() iter_info_filename = os.path.join(outdir, 'iter_info.txt') open(iter_info_filename, 'w').close() for t in range(FLAGS.n_fw_iter): g = tf.Graph() with g.as_default(): sess = tf.InteractiveSession() with sess.as_default(): tf.set_random_seed(FLAGS.seed) # Build Model w = Normal(loc=tf.zeros(D, tf.float32), scale=tf.ones(D, tf.float32)) X = tf.placeholder(tf.float32, [None, D]) y = Bernoulli(logits=ed.dot(X, w)) p_joint = blr_utils.Joint(Xtrain, ytrain, sess, FLAGS.n_monte_carlo_samples, logger) # vectorized Model evaluations n_test_samples = 100 W = tf.placeholder(tf.float32, [n_test_samples, D]) y_data = tf.placeholder(tf.float32, [None]) # N -> (N, n_test) y_data_matrix = tf.tile(tf.expand_dims(y_data, 1), (1, n_test_samples)) pred_logits = tf.matmul(X, tf.transpose(W)) # (N, n_test) ypred = tf.sigmoid(tf.reduce_mean(pred_logits, axis=1)) pY = Bernoulli(logits=pred_logits) # (N, n_test) log_likelihoods = pY.log_prob(y_data_matrix) # (N, n_test) log_likelihood_expectation = tf.reduce_mean(log_likelihoods, axis=1) # (N, ) ll_mean, ll_std = tf.nn.moments(log_likelihood_expectation, axes=[0]) if t == 0: fw_iterates = {} else: # Current solution prev_components = [ coreutils.base_loc_scale(FLAGS.base_dist, c['loc'], c['scale'], multivariate=is_vector) for c in q_params ] qtw_prev = coreutils.get_mixture(weights, prev_components) fw_iterates = {w: qtw_prev} # s is the solution to LMO, random initialization s = coreutils.construct_base(FLAGS.base_dist, [D], t, 's', multivariate=is_vector) sess.run(tf.global_variables_initializer()) total_time = 0. inference_time_start = time.time() # Run relbo to solve LMO problem # If the first atom is being selected through running LMO # it is equivalent to running vi on a uniform prior # Since uniform is not in our variational family try # only random element (without LMO inference) as initial iterate if FLAGS.iter0 == 'vi' or t > 0: inference = relbo.KLqp({w: s}, fw_iterates=fw_iterates, data={ X: Xtrain, y: ytrain }, fw_iter=t) inference.run(n_iter=FLAGS.LMO_iter) inference_time_end = time.time() # compute only step size selection time #total_time += float(inference_time_end - inference_time_start) loc_s = s.mean().eval() scale_s = s.stddev().eval() # Evaluate the next step step_result = {} if t == 0: # Initialization, q_0 q_params.append({'loc': loc_s, 'scale': scale_s}) weights.append(1.) if FLAGS.fw_variant.startswith('ada'): lipschitz_estimate = opt.adafw_linit(s, p_joint) step_type = 'init' elif FLAGS.fw_variant == 'fixed': start_step_time = time.time() step_result = opt.fixed(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t) end_step_time = time.time() total_time += float(end_step_time - start_step_time) elif FLAGS.fw_variant == 'adafw': start_step_time = time.time() step_result = opt.adaptive_fw(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type == 'adaptive': lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'ada_pfw': start_step_time = time.time() step_result = opt.adaptive_pfw(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type in ['adaptive', 'drop']: lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'ada_afw': start_step_time = time.time() step_result = opt.adaptive_afw(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type in ['adaptive', 'away', 'drop']: lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'line_search': start_step_time = time.time() step_result = opt.line_search_dkl(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] else: raise NotImplementedError( 'Step size variant %s not implemented' % FLAGS.fw_variant) if t == 0: gamma = 1. new_components = [s] else: q_params = step_result['params'] weights = step_result['weights'] gamma = step_result['gamma'] new_components = [ coreutils.base_loc_scale(FLAGS.base_dist, c['loc'], c['scale'], multivariate=is_vector) for c in q_params ] qtw_new = coreutils.get_mixture(weights, new_components) # Log metrics for current iteration logger.info('total time %f' % total_time) append_to_file(times_filename, total_time) elbo_t = elbo(qtw_new, p_joint, return_std=False) # testing elbo directory from KLqp elbo_loss = elboModel.KLqp({w: qtw_new}, data={ X: Xtrain, y: ytrain }) res_update = elbo_loss.run() logger.info("iter, %d, elbo, %.2f loss %.2f" % (t, elbo_t, res_update['loss'])) append_to_file(elbos_filename, "%f,%f" % (elbo_t, res_update['loss'])) logger.info('iter %d, gamma %.4f' % (t, gamma)) append_to_file(step_filename, gamma) if t > 0: gap_t = step_result['gap'] logger.info('iter %d, gap %.4f' % (t, gap_t)) append_to_file(gap_filename, gap_t) if FLAGS.fw_variant.startswith('ada'): append_to_file(lipschitz_filename, lipschitz_estimate) append_to_file(iter_info_filename, step_type) logger.info('lt = %.5f, iter_type = %s' % (lipschitz_estimate, step_type)) # get weight samples to evaluate expectations w_samples = qtw_new.sample([n_test_samples]).eval() ll_train_mean, ll_train_std = sess.run([ll_mean, ll_std], feed_dict={ W: w_samples, X: Xtrain, y_data: ytrain }) logger.info("iter, %d, train ll, %.2f +/- %.2f" % (t, ll_train_mean, ll_train_std)) append_to_file(ll_train_filename, "%f,%f" % (ll_train_mean, ll_train_std)) ll_test_mean, ll_test_std, y_test_pred = sess.run( [ll_mean, ll_std, ypred], feed_dict={ W: w_samples, X: Xtest, y_data: ytest }) logger.info("iter, %d, test ll, %.2f +/- %.2f" % (t, ll_test_mean, ll_test_std)) append_to_file(ll_test_filename, "%f,%f" % (ll_test_mean, ll_test_std)) roc_score = roc_auc_score(ytest, y_test_pred) logger.info("iter %d, roc %.4f" % (t, roc_score)) append_to_file(rocs_filename, roc_score) y_post = ed.copy(y, {w: qtw_new}) # eq. to y = Bernoulli(logits=ed.dot(X, qtw_new)) ed_train_ll = ed.evaluate('log_likelihood', data={ X: Xtrain, y_post: ytrain, }) ed_test_ll = ed.evaluate('log_likelihood', data={ X: Xtest, y_post: ytest, }) logger.info("edward train ll %.2f test ll %.2f" % (ed_train_ll, ed_test_ll)) bin_ac_train = ed.evaluate('binary_accuracy', data={ X: Xtrain, y_post: ytrain, }) bin_ac_test = ed.evaluate('binary_accuracy', data={ X: Xtest, y_post: ytest, }) append_to_file(bin_ac_filename, "%f,%f" % (bin_ac_train, bin_ac_test)) logger.info( "edward binary accuracy train ll %.2f test ll %.2f" % (bin_ac_train, bin_ac_test)) mse_test = ed.evaluate('mean_squared_error', data={ X: Xtest, y_post: ytest, }) logger.info("edward mse test ll %.2f" % (mse_test)) sess.close() tf.reset_default_graph()
D = 10 w_true = np.random.randn(D) X_train,y_train = build_toy_dataset(N,w_true) X_test,y_test = build_toy_dataset(N,w_true) X = tf.placeholder(tf.float32, [N,D]) w = Normal(mu=tf.zeros(D),sigma= tf.ones(D)) b = Normal(mu=tf.zeros(1),sigma= tf.ones(1)) y = Normal(mu=ed.dot(X,w)+b,sigma= tf.ones(N)) #Inference qw = Normal(mu = tf.Variable(tf.random_normal([D])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(mu = tf.Variable(tf.random_normal(([1]))), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) inference = ed.KLqp({w: qw, b: qb},data = {X:X_train, y: y_train}) inference.run(n_samples=3, n_iter=1000) #Criticism y_post = ed.copy(y , {w: qw, b:qb }) # This is equivalent to # y_post = Normal( mu = ed.dot(X, qw) + qb, sigma=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error',data = {X : X_test, y_post: y_test}))
each set of latent variables z in zs; and also return the true value.""" x_test = xs['x'] b = zs[:, 0] W = tf.transpose(zs[:, 1:]) y_pred = tf.reduce_mean(tf.matmul(x_test, W) + b, 1) return y_pred def build_toy_dataset(n_data=40, coeff=np.random.randn(10), noise_std=0.1): n_dim = len(coeff) x = np.random.randn(n_data, n_dim).astype(np.float32) y = np.dot(x, coeff) + norm.rvs(0, noise_std, size=n_data) return {'x': x, 'y': y} ed.set_seed(42) model = LinearModel() variational = Variational() variational.add(Normal(model.num_vars)) coeff = np.random.randn(10) data = build_toy_dataset(coeff=coeff) inference = ed.MFVI(model, variational, data) inference.run(n_iter=250, n_minibatch=5, n_print=10) data_test = build_toy_dataset(coeff=coeff) x_test, y_test = data_test['x'], data_test['y'] print(ed.evaluate('mse', model, variational, {'x': x_test}, y_test))
def main(_): ed.set_seed(FLAGS.seed) ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data() N, D = Xtrain.shape N_test, D_test = Xtest.shape weights, q_components = [], [] g = tf.Graph() with g.as_default(): tf.set_random_seed(FLAGS.seed) sess = tf.InteractiveSession() with sess.as_default(): # MODEL w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D)) X = tf.placeholder(tf.float32, [N, D]) y = Bernoulli(logits=ed.dot(X, w)) X_test = tf.placeholder( tf.float32, [N_test, D_test ]) # TODO why are these test variables necessary? y_test = Bernoulli(logits=ed.dot(X_test, w)) iter = 42 # TODO qw = construct_multivariatenormaldiag([D], iter, 'qw') inference = ed.KLqp({w: qw}, data={X: Xtrain, y: ytrain}) tf.global_variables_initializer().run() inference.run(n_iter=FLAGS.LMO_iter) x_post = ed.copy(y, {w: qw}) x_post_t = ed.copy(y_test, {w: qw}) print( 'log-likelihood train ', ed.evaluate('log_likelihood', data={ x_post: ytrain, X: Xtrain })) print( 'log-likelihood test ', ed.evaluate('log_likelihood', data={ x_post_t: ytest, X_test: Xtest })) print( 'binary_accuracy train ', ed.evaluate('binary_accuracy', data={ x_post: ytrain, X: Xtrain })) print( 'binary_accuracy test ', ed.evaluate('binary_accuracy', data={ x_post_t: ytest, X_test: Xtest }))
def main(_): ed.set_seed(42) # DATA X_train, y_train = build_toy_dataset(FLAGS.N) X_test, y_test = build_toy_dataset(FLAGS.N) # MODEL X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N)) # INFERENCE qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1])) inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(step_size=1e-3) # CRITICISM # Plot posterior samples. sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride], qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show() print("Displaying posterior predictive samples.") n_posterior_samples = 10 w_post = qw.sample(n_posterior_samples).eval() b_post = qb.sample(n_posterior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_posterior_samples): output = inputs * w_post[ns] + b_post[ns] plt.plot(inputs, output) plt.show()