def create_target_dist(): """Create and return target distribution.""" if FLAGS.dist != 'normal': raise NotImplementedError pi = np.random.dirichlet([1.] * K) #pi = pi[np.newaxis, :].astype(np.float32) #mus = 2.*np.random.rand(K, D).astype(np.float32) - 1. #stds = np.random.rand(K, D).astype(np.float32) mus = np.random.randn(K, D).astype(np.float32) stds = softplus(np.random.randn(K, D).astype(np.float32)) pcomps = [ MultivariateNormalDiag(loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale_diag=tf.convert_to_tensor( stds[i], dtype=tf.float32)) for i in range(K) ] p = Mixture( cat=Categorical(probs=tf.convert_to_tensor(pi, dtype=tf.float32)), components=pcomps) #q = VectorLaplaceDiag(loc=mus[0], scale_diag=stds[0]) return p, mus, stds
def __init__(self, mnist, input_dim=784, output_dim=10, iterations=250, batch_size=100): self.input_dim = input_dim self.output_dim = output_dim self.iterations = iterations self.batch_size = batch_size self.X_placeholder = tf.placeholder(tf.float32, (None, self.input_dim)) self.Y_placeholder = tf.placeholder(tf.int32, (None, )) w_shape = (input_dim, output_dim) self.w = Normal(loc=tf.zeros(w_shape), scale=tf.ones(w_shape)) self.b = Normal(loc=tf.zeros(w_shape[-1]), scale=tf.ones(w_shape[-1])) self.pred = Categorical(tf.matmul(self.X_placeholder, self.w) + self.b) self.qw = Normal(loc=tf.Variable(tf.random_normal(w_shape)), scale=tf.nn.softplus( tf.Variable(tf.random_normal(w_shape)))) self.qb = Normal(loc=tf.Variable(tf.random_normal([w_shape[-1]])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([w_shape[-1]])))) self.inference = ed.KLqp({ self.w: self.qw, self.b: self.qb }, data={self.pred: self.Y_placeholder}) self.inference.initialize( n_iter=self.iterations, scale={self.pred: mnist.train.num_examples / self.batch_size})
def language_model(input): """Form p(x[0], ..., x[timesteps]), \prod_{t=1}^{timesteps} p(x[t] | x[:t]), where x = [x[0], ..., x[timesteps - 1]] is `input`. We do not include p(x[0]) which is a constant wrt parameters. The input also does not include the timesteps index. To calculate the probability, we will call log_prob on x = [x[1], ..., x[timesteps]]. We implement this separately from the generative model so the forward pass, e.g., embedding/dense layers, can be parallelized. [batch_size, timesteps] -> [batch_size, timesteps] """ x = tf.one_hot(input, depth=vocab_size, dtype=tf.float32) h = tf.fill(tf.stack([tf.shape(x)[0], hidden_size]), 0.0) c = tf.fill(tf.stack([tf.shape(x)[0], hidden_size]), 0.0) hs = [] reuse = None for t in range(timesteps): if t > 0: reuse = True xt = x[:, t, :] h, c = lstm_cell(xt, h, c, name="lstm", reuse=reuse) hs.append(h) h = tf.stack(hs, 1) logits = tf.layers.dense(h, vocab_size, name="dense") output = Categorical(logits=logits) return output
def get_tf_mixture(locs, diags, weights): q_comps = [ MultivariateNormalDiag(loc=loc, scale_diag=scale_diag) for loc, scale_diag in zip(locs, diags) ] cat = Categorical(probs=tf.convert_to_tensor(weights)) return Mixture(cat=cat, components=q_comps)
def define_val_model(self, N, P, K): # Define new graph self.z_test = Gamma(2. * tf.ones([N, K]), 1. * tf.ones([N, K])) self.l_test = TransformedDistribution( distribution=Normal(self.mean_llib * tf.ones([N, 1]), np.sqrt(self.std_llib) * tf.ones([N, 1])), bijector=tf.contrib.distributions.bijectors.Exp()) rho_test = tf.matmul(self.z_test, self.W0) rho_test = rho_test / tf.reshape(tf.reduce_sum(rho_test, axis=1), (-1, 1)) # NxP self.lam_test = Gamma(self.r, self.r / (rho_test * self.l_test)) if self.zero_inflation: logit_pi_test = tf.matmul(self.z_test, self.W1) pi_test = tf.minimum( tf.maximum(tf.nn.sigmoid(logit_pi_test), 1e-7), 1. - 1e-7) cat_test = Categorical( probs=tf.stack([pi_test, 1. - pi_test], axis=2)) components_test = [ Poisson(rate=1e-30 * tf.ones([N, P])), Poisson(rate=self.lam_test) ] self.likelihood_test = Mixture(cat=cat_test, components=components_test) else: self.likelihood_test = Poisson(rate=self.lam_test)
def BuildModelDynamic(x): pq = {} q = [] for i, value in enumerate(layers): if (i == len(layers) - 1): break inputs = value outputs = layers[i + 1] w = Normal(tf.zeros([inputs, outputs]), scale=tf.ones(outputs)) b = Normal(tf.zeros(outputs), scale=tf.ones(outputs)) if (i == (len(layers) - 1)): x = tf.nn.softmax(tf.matmul(x, w) + b) else: x = tf.nn.relu(tf.matmul(x, w) + b) qw = Normal(loc=tf.get_variable('loc/qw_' + str(i), [inputs, outputs]), scale=tf.get_variable('scale/qw_' + str(i), [inputs, outputs])) qb = Normal(loc=tf.get_variable('loc/qb_' + str(i), [outputs]), scale=tf.nn.softplus( tf.get_variable('scale/qb_' + str(i), [outputs]))) pq[w] = qw pq[b] = qb q.append({'qw': qw, 'qb': qb}) y = Categorical(x) y_ph = tf.placeholder(tf.int32, [N]) inference = ed.KLqp(pq, data={y: y_ph}) return inference, y_ph, y, q
def language_model(input, vocab_size): """Form p(x[0], ..., x[timesteps - 1]), \prod_{t=0}^{timesteps - 1} p(x[t] | x[:t]), To calculate the probability, we call log_prob on x = [x[0], ..., x[timesteps - 1]] given `input` = [0, x[0], ..., x[timesteps - 2]]. We implement this separately from the generative model so the forward pass, e.g., embedding/dense layers, can be parallelized. [batch_size, timesteps] -> [batch_size, timesteps] """ x = tf.one_hot(input, depth=vocab_size, dtype=tf.float32) h = tf.fill(tf.stack([tf.shape(x)[0], FLAGS.hidden_size]), 0.0) c = tf.fill(tf.stack([tf.shape(x)[0], FLAGS.hidden_size]), 0.0) hs = [] reuse = None for t in range(FLAGS.timesteps): if t > 0: reuse = True xt = x[:, t, :] h, c = lstm_cell(xt, h, c, name="lstm", reuse=reuse) hs.append(h) h = tf.stack(hs, 1) logits = tf.layers.dense(h, vocab_size, name="dense") output = Categorical(logits=logits) return output
def build_mixture(weights, components): cat = Categorical(probs=tf.convert_to_tensor(weights)) comps = [ base(loc=tf.convert_to_tensor(c['loc']), scale=tf.convert_to_tensor(c['scale'])) for c in components ] mix = Mixture(cat=cat, components=comps) return mix
def test_list(self): with self.test_session() as sess: x = Normal(tf.constant(0.0), tf.constant(0.1)) y = Normal(tf.constant(10.0), tf.constant(0.1)) cat = Categorical(logits=tf.zeros(5)) components = [Normal(x, tf.constant(0.1)) for _ in range(5)] z = Mixture(cat=cat, components=components) z_new = ed.copy(z, {x: y.value()}) self.assertGreater(z_new.value().eval(), 5.0)
def _build_model(self, X, y): """ implementation of the KMN """ # create a placeholder for the target self.y_ph = y_ph = tf.placeholder(tf.float32, [None]) self.n_sample_ph = tf.placeholder(tf.int32, None) # store feature dimension size for placeholder self.n_features = X.shape[1] # if no external estimator is provided, create a default neural network if self.estimator is None: self.X_ph = tf.placeholder(tf.float32, [None, self.n_features]) # two dense hidden layers with 15 nodes each x = Dense(15, activation='relu')(self.X_ph) x = Dense(15, activation='relu')(x) self.estimator = x # get batch size self.batch_size = tf.shape(self.X_ph)[0] # locations of the gaussian kernel centers n_locs = self.n_centers self.locs = locs = sample_center_points(y, method=self.center_sampling_method, k=n_locs, keep_edges=self.keep_edges) self.locs_array = locs_array = tf.unstack(tf.transpose(tf.multiply(tf.ones((self.batch_size, n_locs)), locs))) # scales of the gaussian kernels self.scales = scales = tf.nn.softplus(tf.Variable(self.init_scales, dtype=tf.float32, trainable=self.train_scales)) self.scales_array = scales_array = tf.unstack(tf.transpose(tf.multiply(tf.ones((self.batch_size, self.n_scales)), scales))) # kernel weights, as output by the neural network self.weights = weights = Dense(n_locs * self.n_scales, activation='softplus')(self.estimator) # mixture distributions self.cat = cat = Categorical(logits=weights) self.components = components = [Normal(loc=loc, scale=scale) for loc in locs_array for scale in scales_array] self.mixtures = mixtures = Mixture(cat=cat, components=components, value=tf.zeros_like(y_ph)) # tensor to store samples #self.samples = mixtures.sample(sample_shape=self.n_samples) self.samples = mixtures.sample() # store minmax of training target values for a sensible default grid for self.predict_density() #self.y_range = y.max() - y.min() #self.y_min = y.min() - 0.1 * self.y_range #self.y_max = y.max() + 0.1 * self.y_range self.y_min = y.min() self.y_max = y.max() # placeholder for the grid self.y_grid_ph = y_grid_ph = tf.placeholder(tf.float32) # tensor to store grid point densities self.densities = tf.transpose(mixtures.prob(tf.reshape(y_grid_ph, (-1, 1)))) # tensor to compute likelihoods self.likelihoods = mixtures.prob(y_ph)
def eye_color(person): random_variables = { x.name: x for x in tf.get_collection('_random_variable_collection_') } if person + '/' in random_variables: return random_variables[person + '/'] else: return Categorical(probs=tf.ones(3) / 3, name=person)
def model_stationary_dirichlet_categorical_edward(n_states, chain_len, batch_size): """ Models a stationary Dirichlet-Categorical Markov Chain in Edward """ tf.reset_default_graph() # create default starting state probability vector pi_0 = Dirichlet(tf.ones(n_states)) x_0 = Categorical(probs=pi_0, sample_shape=batch_size) # transition matrix pi_T = Dirichlet(tf.ones([n_states, n_states])) x = [] for _ in range(chain_len): x_tm1 = x[-1] if x else x_0 x_t = Categorical(probs=tf.gather(pi_T, x_tm1)) x.append(x_t) return x, pi_0, pi_T
def eye_color(person): random_variables = { x.name: x for x in tf.get_collection('_random_variable_collection_') } if person + '/' in random_variables: return random_variables[person + '/'] else: return Categorical(logits=ed.logit(tf.constant([1.0 / 3] * 3)), name=person)
def deserialize_target_from_file(filename): qt_deserialized = np.load(filename) mus = qt_deserialized['mus'].astype(np.float32) stds = qt_deserialized['stds'].astype(np.float32) pi = qt_deserialized['pi'].astype(np.float32) cat = Categorical(probs=tf.convert_to_tensor(pi)) target_comps = [Normal(loc=tf.convert_to_tensor(mus[i]), scale=tf.convert_to_tensor(stds[i])) for i in range(len(mus))] return Mixture(cat=cat, components=target_comps)
def main(): # build model xcomps = [ Normal(loc=tf.convert_to_tensor(mixture_model_relbo.mus[i]), scale=tf.convert_to_tensor(mixture_model_relbo.stds[i])) for i in range(len(mixture_model_relbo.mus)) ] x = Mixture( cat=Categorical(probs=tf.convert_to_tensor(mixture_model_relbo.pi)), components=xcomps, sample_shape=mixture_model_relbo.N) x_mvns = [ MultivariateNormalDiag( loc=tf.convert_to_tensor(mixture_model_relbo.mus[i]), scale_diag=tf.convert_to_tensor(mixture_model_relbo.stds[i])) for i in range(len(mixture_model_relbo.mus)) ] x_train, components = mixture_model_relbo.build_toy_dataset( mixture_model_relbo.N) n_examples, n_features = x_train.shape qxs = [ MultivariateNormalDiag(loc=[scipy.stats.norm.rvs(1)], scale_diag=[scipy.stats.norm.rvs(1)]) for i in range(10) ] truth = [ MultivariateNormalDiag(loc=mixture_model_relbo.mus[i], scale_diag=mixture_model_relbo.stds[i]) for i in range(len(mixture_model_relbo.mus)) ] qxs.extend(truth) mix = Mixture(cat=Categorical(probs=[1. / len(qxs)] * len(qxs)), components=qxs) sess = tf.InteractiveSession() with sess.as_default(): mixture_model_relbo.fully_corrective(mix, x)
def model_non_stationary_dirichlet_categorical(n_states, chain_len, batch_size): """ Models a non-stationary Dirichlet-Categorical Markov Chain in Edward """ tf.reset_default_graph() # create default starting state probability vector pi_0 = Dirichlet(tf.ones(n_states)) x_0 = Categorical(probs=pi_0, sample_shape=batch_size) pi_T, x = [], [] for _ in range(chain_len): x_tm1 = x[-1] if x else x_0 # transition matrix, one per position in the chain: # i.e. we now condition both on previous state and age of the loan pi_T_t = Dirichlet(tf.ones([n_states, n_states])) x_t = Categorical(probs=tf.gather(pi_T_t, x_tm1)) pi_T.append(pi_T_t) x.append(x_t) return x, pi_0, pi_T
def deserialize_mixture_from_file(filename): qt_deserialized = np.load(filename) locs = qt_deserialized['locs'].astype(np.float32) scale_diags = qt_deserialized['scale_diags'].astype(np.float32) weights = qt_deserialized['weights'].astype(np.float32) #q_comps = [Normal(loc=loc[0], scale=tf.nn.softmax(scale_diag)[0]) \ q_comps = [Normal(loc=loc[0], scale=scale_diag[0]) \ for loc, scale_diag in zip(locs, scale_diags)] cat = Categorical(probs=tf.convert_to_tensor(weights)) q_latest = Mixture(cat=cat, components=q_comps) return q_latest
def copy_model_tfp(qpi_0, qpi_T, chain_len, n_states, sample_shape): """ Used in place of ed.copy as it seems like ed.copy doesn't take into account all the necessary dependencies in our graph. """ x_0 = Categorical(probs=qpi_0, sample_shape=sample_shape) # transition matrix transition_distribution = Categorical(probs=qpi_T) pi_E = np.eye(n_states, dtype=np.float32) # identity matrix emission_distribution = Categorical(probs=pi_E) model_post = HiddenMarkovModel( initial_distribution=x_0, transition_distribution=transition_distribution, observation_distribution=emission_distribution, num_steps=chain_len, sample_shape=sample_shape) return model_post
def deserialize_mixture_from_file(filename): qt_deserialized = np.load(filename) locs = qt_deserialized['locs'].astype(np.float32) scale_diags = qt_deserialized['scale_diags'].astype(np.float32) weights = qt_deserialized['weights'].astype(np.float32) q_comps = [ MultivariateNormalDiag(loc=loc, scale_diag=scale_diag) for loc, scale_diag in zip(locs, scale_diags) ] cat = Categorical(probs=tf.convert_to_tensor(weights)) q_latest = Mixture(cat=cat, components=q_comps) return q_latest
def ed_graph_2(disc=1): # Priors if str(sys.argv[4]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=(std**2/D)*tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, K]), scale=(std**2/n_hidden)*tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=(std**2/D)*tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(K), scale=(std**2/n_hidden)*tf.ones(K)) if str(sys.argv[4]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std*D**(-.5)*tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, K]), scale=std*n_hidden**(-.5)*tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std*D**(-.5)*tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(K), scale=std*n_hidden**(-.5)*tf.ones(K)) if str(sys.argv[4]) == 'T': W_0 = StudentT(df=df*tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=std**2/D*tf.ones([D, n_hidden])) W_1 = StudentT(df=df*tf.ones([n_hidden, K]), loc=tf.zeros([n_hidden, K]), scale=std**2/n_hidden*tf.ones([n_hidden, K])) b_0 = StudentT(df=df*tf.ones([n_hidden]), loc=tf.zeros(n_hidden), scale=std**2/D*tf.ones(n_hidden)) b_1 = StudentT(df=df*tf.ones([K]), loc=tf.zeros(K), scale=std**2/n_hidden*tf.ones(K)) x = tf.placeholder(tf.float32, [None, None]) y = Categorical(logits=nn(x, W_0, b_0, W_1, b_1)) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [None]) # Use a placeholder for the pre-trained posteriors w0 = tf.placeholder(tf.float32, [n_samp, D, n_hidden]) w1 = tf.placeholder(tf.float32, [n_samp, n_hidden, K]) b0 = tf.placeholder(tf.float32, [n_samp, n_hidden]) b1 = tf.placeholder(tf.float32, [n_samp, K]) # Empirical distribution qW_0 = Empirical(params=tf.Variable(w0)) qW_1 = Empirical(params=tf.Variable(w1)) qb_0 = Empirical(params=tf.Variable(b0)) qb_1 = Empirical(params=tf.Variable(b1)) if str(sys.argv[3]) == 'hmc': inference = ed.HMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data={y: y_ph}) if str(sys.argv[3]) == 'sghmc': inference = ed.SGHMC({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1}, data={y: y_ph}) # Initialse the inference variables if str(sys.argv[3]) == 'hmc': inference.initialize(step_size = disc*leap_size, n_steps = step_no, n_print=100) if str(sys.argv[3]) == 'sghmc': inference.initialize(step_size = disc*leap_size, friction=disc**2*0.1, n_print=100) return ((x, y), y_ph, W_0, b_0, W_1, b_1, qW_0, qb_0, qW_1, qb_1, inference, w0, w1, b0, b1)
def model_stationary_dirichlet_categorical_tfp(n_states, chain_len, batch_size): """ Models a stationary Dirichlet-Categorical Markov Chain in TensorFlow Probability/Edward2 """ tf.reset_default_graph() # create default starting state probability vector pi_0 = Dirichlet(tf.ones(n_states)) x_0 = Categorical(probs=pi_0, sample_shape=batch_size) # transition matrix pi_T = Dirichlet(tf.ones([n_states, n_states])) transition_distribution = Categorical(probs=pi_T) pi_E = np.eye(n_states, dtype=np.float32) # identity matrix emission_distribution = Categorical(probs=pi_E) model = HiddenMarkovModel(initial_distribution=x_0, transition_distribution=transition_distribution, observation_distribution=emission_distribution, num_steps=chain_len, sample_shape=batch_size) return model, pi_0, pi_T
def language_model_gen(batch_size): """Generate x ~ prod p(x_t | x_{<t}). Output [batch_size, timesteps]. """ # Initialize data input randomly. x = tf.random_uniform([batch_size], 0, vocab_size, dtype=tf.int32) h = tf.zeros([batch_size, hidden_size]) c = tf.zeros([batch_size, hidden_size]) xs = [] for _ in range(timesteps): x = tf.one_hot(x, depth=vocab_size, dtype=tf.float32) h, c = lstm_cell(x, h, c, name="lstm") logits = tf.layers.dense(h, vocab_size, name="dense") x = Categorical(logits=logits).value() xs.append(x) xs = tf.cast(tf.stack(xs, 1), tf.int32) return xs
def target_dist(*args, **kwargs): """Build the target distribution""" stds = kwargs['stds'] mus = kwargs['mus'] pi = kwargs['pi'] pcomps = [ MultivariateNormalDiag( loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale_diag=tf.convert_to_tensor( stds[i], dtype=tf.float32)) for i in range(len(mus)) ] p = Mixture( cat=Categorical(probs=tf.convert_to_tensor(pi[0])), components=pcomps) #q = VectorLaplaceDiag(loc=mus[0], scale_diag=stds[0]) return p
def line_search(q, s, p): s_samples = s.sample(1000).eval() q_samples = q.sample(1000).eval() gamma = 0.5 def get_new_weights(gamma): weights = q.cat.probs.eval() weights *= (1 - gamma) weights = np.append(weights, gamma).astype(np.float32) return weights comps = q.components comps = list(comps) comps.append(s) T = 50 for t in range(T): weights = get_new_weights(gamma) mix = Mixture(cat=Categorical(weights), components=comps) s_expectation = tf.reduce_sum(mix.log_prob(s_samples), axis=1) - p.log_prob(s_samples) q_expectation = tf.reduce_sum(mix.log_prob(q_samples), axis=1) - p.log_prob(q_samples) grad = s_expectation - q_expectation grad = tf.reduce_mean(grad).eval() print("t", t, "gamma", gamma, "grad", grad) step_size = 0.01 / (t + 1) gamma_prime = gamma - grad * step_size if gamma_prime >= 1 or gamma_prime <= 0: gamma_prime = max(min(gamma_prime, 1.), 0.) if np.abs(gamma - gamma_prime) < 1e-6: gamma = gamma_prime break gamma = gamma_prime if gamma < 1e-5: gamma = 1e-5 print("final t", t, "gamma", gamma, "grad", grad) return get_new_weights(gamma)
def get_mixture(weights, components): """Build a mixture model with given weights and components. Args: weights: list or np.array components: list ed.distribution Returns: constructed mixture """ assert len(weights) == len( components), 'Weights size %d not same as components size %d' % ( len(weights), len(components)) assert math.isclose(1., sum(weights), rel_tol=1e-5), "Weights not normalized" return Mixture( cat=Categorical(probs=tf.convert_to_tensor(weights, dtype=tf.float32)), components=components)
def test_indexedslices(self): """Test that gradients accumulate when tf.gradients doesn't return tf.Tensor (IndexedSlices).""" with self.test_session() as sess: N = 10 # number of data points K = 2 # number of clusters T = 1 # number of MCMC samples x_data = np.zeros(N, dtype=np.float32) mu = Normal(0.0, 1.0, sample_shape=K) c = Categorical(logits=tf.zeros(N)) x = Normal(tf.gather(mu, c), tf.ones(N)) qmu = Empirical(params=tf.Variable(tf.ones([T, K]))) qc = Empirical(params=tf.Variable(tf.ones([T, N]))) inference = ed.HMC({mu: qmu}, data={x: x_data}) inference.initialize()
def define_stochastic_model(self, P, K): M = self.minibatch_size self.W0 = Gamma(0.1 * tf.ones([K, P]), 0.3 * tf.ones([K, P])) if self.zero_inflation: self.W1 = Normal(tf.zeros([K, P]), tf.ones([K, P])) self.z = Gamma(2. * tf.ones([M, K]), 1. * tf.ones([M, K])) self.r = Gamma(2. * tf.ones([ P, ]), 1. * tf.ones([ P, ])) self.l = TransformedDistribution( distribution=Normal(self.mean_llib * tf.ones([M, 1]), self.std_llib * tf.ones([M, 1])), bijector=tf.contrib.distributions.bijectors.Exp()) self.rho = tf.matmul(self.z, self.W0) self.rho = self.rho / tf.reshape(tf.reduce_sum(self.rho, axis=1), (-1, 1)) # NxP self.lam = Gamma(self.r, self.r / (self.rho * self.l)) if self.zero_inflation: self.logit_pi = tf.matmul(self.z, self.W1) self.pi = tf.minimum( tf.maximum(tf.nn.sigmoid(self.logit_pi), 1e-7), 1. - 1e-7) self.cat = Categorical( probs=tf.stack([self.pi, 1. - self.pi], axis=2)) self.components = [ Poisson(rate=1e-30 * tf.ones([M, P])), Poisson(rate=self.lam) ] self.likelihood = Mixture(cat=self.cat, components=self.components) else: self.likelihood = Poisson(rate=self.lam)
def main(_): # DATA pi_true = np.random.dirichlet(np.array([20.0, 30.0, 10.0, 10.0])) z_data = np.array( [np.random.choice(FLAGS.K, 1, p=pi_true)[0] for n in range(FLAGS.N)]) print("pi: {}".format(pi_true)) # MODEL pi = Dirichlet(tf.ones(4)) z = Categorical(probs=pi, sample_shape=FLAGS.N) # INFERENCE qpi = Dirichlet( tf.nn.softplus(tf.get_variable("qpi/concentration", [FLAGS.K]))) inference = ed.KLqp({pi: qpi}, data={z: z_data}) inference.run(n_iter=1500, n_samples=30) sess = ed.get_session() print("Inferred pi: {}".format(sess.run(qpi.mean())))
def test_lipschitz_init(pi, mus, stds): g = tf.Graph() with g.as_default(): tf.set_random_seed(FLAGS.seed) sess = tf.InteractiveSession() with sess.as_default(): s = construct_normal([1], 0, 's') sess.run(tf.global_variables_initializer()) logger.info('mean of s = %.3f, std = %.3f' % (s.mean().eval(), s.stddev().eval())) # build target distribution pcomps = [ MultivariateNormalDiag( loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale_diag=tf.convert_to_tensor(stds[i], dtype=tf.float32)) for i in range(len(mus)) ] p = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi)), components=pcomps) lipschitz_init_estimate = opt.adafw_linit(s, p) logger.info('L estimate is %.5f' % lipschitz_init_estimate)
def fit(self, X, y): self.label_encoder = LabelEncoder().fit(y) y = self.oh_encoder.transform(y) DIN = X.shape[1] DOUT = len(set(y)) X_data = tf.placeholder(tf.float32, [None, DIN]) W = Normal(loc=tf.zeros([DIN, DOUT]), scale=tf.ones([DIN, DOUT])) b = Normal(loc=tf.zeros([DOUT]), scale=tf.ones([DOUT])) y_data = Categorical(logits=tf.matmul(X_data, W) + b) qW = Normal(loc=tf.Variable(tf.random_normal([DIN, DOUT])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([DIN, DOUT])))) qb = Normal(loc=tf.Variable(tf.random_normal([DOUT])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([DOUT])))) self.model = ed.KLqp({W: qW, b: qb}, data={X_data: X, y_data: y}) self.model.run(n_samples=self.n_samples, n_iter=self.n_iter) self.W = qW self.b = qb
def _test(logits, n): x = Categorical(logits=logits) val_est = get_dims(x.sample(n)) val_true = n + get_dims(logits)[:-1] assert val_est == val_true