def logZGivenX(X,mu,sigma_squared,piK): # log P(z|x) = log P(x|z) + log P(z) - log P(x) logXGivenZ = logGaussianDensity(X,mu,sigma_squared) # B x K logZ = tf.log(piK) # 1 x K logZ_X = logZ + logXGivenZ # B x K logX = reduce_logsumexp(logZ_X, 1,keep_dims = True) return logZ_X-logX, logX,logZ_X
def build_graph(B, K, learning_rate, D): #Parameters x = tf.placeholder(tf.float32, [None, 1, D], name="x") # input data mu_T = tf.Variable(tf.random_normal([1, K, D], mean=0, stddev=0.001), [1, K, D], name="mu_T") # mu tranpose phi = tf.Variable(tf.random_normal([1, K], mean=-1, stddev=0), [1, K], name="phi") psi = tf.Variable(tf.random_normal([1, K], mean=10, stddev=0), [1, K], name="psi") var = tf.exp(phi) #variance [0, inf) log_pi = logsoftmax(psi) # sum(log_pi) = 1 #Q2.1.2 log_prob_x_given_z = compute_log_prob_x_given_z(x, mu_T, var, D) #Q2.1.3 log_prob_z_given_x = compute_log_prob_z_given_x(log_prob_x_given_z, log_pi) argmaxs = tf.argmax(log_prob_z_given_x, 1) mu = tf.reduce_sum(mu_T, 0) #loss #[1,1] <------------------- [B,1] total_loss = -tf.reduce_mean( reduce_logsumexp(tf.add(log_prob_x_given_z, log_pi), keep_dims=True)) # Adam Optimizer optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-5) train = optimizer.minimize(loss=total_loss) pi = tf.exp(log_pi) return x, mu_T, train, mu, total_loss, argmaxs, log_prob_x_given_z, pi, var
def marginal_log_likelihood(x, logpz, mu, sigma): pxn = reduce_logsumexp( tf.transpose(logpz) + log_prob_density(x, mu, sigma), 0) px = tf.reduce_sum(pxn, 0) return px
def log_cluster_probability(x, logpz, mu, sigma): log_px_gz = log_prob_density(x, mu, sigma) # logP(x | z) p_xz = logpz + tf.transpose( log_px_gz) #?????????????????????????????????transpose p_x = reduce_logsumexp(p_xz, 0) log_pz_gx = p_xz / p_x # pz * P(x | z) / P(x) return log_pz_gx
def log_posterior(x, mu, sigma, pi, D): """ Compute the probability of the cluster variable z given the data vector x Input: x: BxD matrix mu: KxD matrix sigma: 1xK matrix pi: 1xK matrix D: Dimension Output: c: BxK matrix: log P(z|x) """ return tf.log(pi) + tf_log_pdf_clust(x, mu, sigma, D) - utils.reduce_logsumexp(tf_log_pdf_clust(x,mu,sigma, D)+tf.log(pi), keep_dims=True)
def marginal_log_likelihood(x, logpz, mu, sigma): ''' Calculates the log marginal probability of the data, i.e. logP(x) Args: x: The data pz: Prior probabilities of the clusters mu: Cluster means sigma: Cluster variance Returns: Log marginal probability of the data ''' pxn = reduce_logsumexp(tf.transpose(logpz) + log_density(x, mu, sigma),0) px = tf.reduce_sum(pxn,0) return px
def marginal_log_likelihood(x, logpz, mu, sigma): ''' Calculates the log marginal probability of the data, i.e. logP(x) Args: x: The data pz: Prior probabilities of the clusters mu: Cluster means sigma: Cluster variance Returns: Log marginal probability of the data ''' pxn = reduce_logsumexp(tf.transpose(logpz) + log_density(x, mu, sigma), 0) px = tf.reduce_sum(pxn, 0) return px
def log_cluster_probability(x, logpz, mu, sigma): ''' Computes the vector of log posterior cluster probabilities given the data, prior, mean and variance. P(Z | x) Args: x: Data pz: Prior probabilities mu: Gaussian mean sigma: Gaussian variance Returns: Vector of log posterior cluster probabilities ''' logpxgz = log_density(x, mu, sigma) # logP(x | z) num = logpz + tf.transpose(logpxgz) den = reduce_logsumexp(num, 0) logpzgx = num/den # pz * P(x | z) / P(x) return logpzgx
def log_cluster_probability(x, logpz, mu, sigma): ''' Computes the vector of log posterior cluster probabilities given the data, prior, mean and variance. P(Z | x) Args: x: Data pz: Prior probabilities mu: Gaussian mean sigma: Gaussian variance Returns: Vector of log posterior cluster probabilities ''' logpxgz = log_density(x, mu, sigma) # logP(x | z) num = logpz + tf.transpose(logpxgz) den = reduce_logsumexp(num, 0) logpzgx = num / den # pz * P(x | z) / P(x) return logpzgx
def t2_validation(lr=0.005, K=3): data = utils.load_data('data2D.npy').astype("float32") M, D = data.shape graph = tf.Graph() with graph.as_default(): x_train = tf.placeholder(tf.float32, shape=(None, D)) mu = tf.Variable(tf.truncated_normal([K, D], dtype=tf.float32)) # Assume isotropic variance sigma = tf.Variable(tf.truncated_normal([K], dtype=tf.float32)) likelihood = std_z(x_train, mu, sigma) log_like_z, z = mog_log_likelihood_z(x_train, mu, sigma) logProb = mog_logprob(log_like_z) norm_dist = mog_dist(x_train, mu, sigma) cost = utils.reduce_logsumexp(likelihood, 0) optim = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(cost) epochs = 100 with tf.Session(graph=graph) as sess: tf.initialize_all_variables().run() cost_l = [] for epoch in range(epochs): x_batch = data[:2 * M / 3] feed_dict = {x_train: x_batch} _, c, like, log_pz, logp, zval, mu_val, sigma_val = sess.run( [optim, cost, likelihood, log_like_z, logProb, z, mu, sigma], feed_dict=feed_dict) cost_l.append(c) ind = np.argmin(like) val = np.min(like) if epoch % 10 == 0: #print log_pz.shape, logp #print log_pz[:10] #print zval print( "Epoch %03d, cost = %.2f. %02d cluster has lowest likelihood %.2f" % (epoch, c, ind, val)) #print("Log prob %.2f" % (logp)) feed_dict = {x_train: data[2 * M / 3:]} _, c, normdist, like, mu = sess.run( [optim, cost, norm_dist, likelihood, mu], feed_dict=feed_dict) ind = np.argmin(like) val = np.min(like) print( "Validation result cost = %.2f. %02d cluster has lowest likelihood %.2f" % (c, ind, val)) # Plotting scatter x_v = data[2 * M / 3:] trainnormdist = np_mog_dist(x_v, mu_val, sigma_val) t = mog_classify(data, trainnormdist) colors = iter(cm.rainbow(np.linspace(0, 1, len(t)))) plt.clf() for i in range(len(t)): print 'plotting scatter...' print 'cluster x, y shape ', t[i][:, 0].shape, t[i][:, 1].shape color_i = next(colors) plt.scatter(t[i][:, 0], t[i][:, 1], color=color_i) plt.scatter(mu[i][0], mu[i][1], marker='x', color=color_i) #print "returned ", s plt.show() plt.savefig('t22_3_scatter_k%d_with_validation.png' % (i)) print like return cost_l, mu
def mog_logprob(log_likelihood_z): return log_likelihood_z - utils.reduce_logsumexp(log_likelihood_z, keep_dims=True)
tf_sub_square_sum = tf.reduce_sum(tf_sub_square, 2, True) tf_sub_square_sum_02 = tf.squeeze(tf.transpose(tf_sub_square_sum)) tf_index = (-0.5) * tf.div(tf_sub_square_sum_02, tf_covariance) tf_log_second_term = tf_index tf_log_first_term = (-0.5 * dim) * tf.log(2 * math.pi * tf_covariance) # log(P(x|z)) tf_log_x_gan_z = tf.add(tf_log_first_term, tf_log_second_term) # log(P(x,z)) tf_log_pro_z_x_gan_z = tf.add(log_pi, tf_log_x_gan_z) #tf_pro_z_x_gan_z = tf.exp(tf_log_pro_z_x_gan_z) #tf_sum_pro_z_x_gan_z = utils.reduce_logsumexp(tf_pro_z_x_gan_z, 1) #tf_log_sum_pro_z_x_gan_z = tf.log(tf_sum_pro_z_x_gan_z) tf_log_sum_pro_z_x_gan_z = utils.reduce_logsumexp(tf_log_pro_z_x_gan_z, 1) #tf_log_like = tf.reduce_mean(tf_log_sum_pro_z_x_gan_z) tf_log_like = tf.reduce_sum(tf_log_sum_pro_z_x_gan_z) tf_loss = -1 * tf_log_like optimizer = tf.train.AdamOptimizer(0.01, 0.9, 0.99, 1e-5).minimize(tf_loss) #optimizer = tf.train.AdamOptimizer(0.001).minimize(tf_loss) #optimizer = tf.train.GradientDescentOptimizer(0.005).minimize(tf_loss) #tf_assignments = tf.argmax(tf_log_pro_z_x_gan_z, 1) #tf_assignments = tf.argmax(tf_log_x_gan_z, 1) #tf_assignments = tf.argmin(eucl_distance(tf_data, tf_mean), dimension = 1) tf_assignments_01 = tf.transpose(tf.expand_dims(tf.reduce_sum(tf_log_pro_z_x_gan_z, 1), 0)) tf_assignments_02 = tf.sub(tf_log_pro_z_x_gan_z, tf_assignments_01)
def t2_validation(lr=0.005, K=3): data = utils.load_data('data2D.npy').astype("float32") M, D = data.shape graph = tf.Graph() with graph.as_default(): x_train = tf.placeholder(tf.float32, shape=(None, D)) mu = tf.Variable(tf.truncated_normal([K, D], dtype=tf.float32)) # Assume isotropic variance sigma = tf.Variable(tf.truncated_normal([K], dtype=tf.float32)) likelihood = std_z(x_train, mu, sigma) log_like_z, z = mog_log_likelihood_z(x_train, mu, sigma) logProb = mog_logprob(log_like_z) norm_dist = mog_dist(x_train, mu, sigma) cost = utils.reduce_logsumexp(likelihood, 0) optim = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(cost) epochs = 100 with tf.Session(graph=graph) as sess: tf.initialize_all_variables().run() cost_l = [] for epoch in range(epochs): x_batch = data[:2*M/3] feed_dict={x_train:x_batch} _, c, like, log_pz, logp, zval, mu_val, sigma_val = sess.run([optim, cost, likelihood, log_like_z, logProb, z, mu, sigma], feed_dict=feed_dict) cost_l.append(c) ind = np.argmin(like) val = np.min(like) if epoch % 10 == 0: #print log_pz.shape, logp #print log_pz[:10] #print zval print("Epoch %03d, cost = %.2f. %02d cluster has lowest likelihood %.2f" % (epoch, c, ind, val)) #print("Log prob %.2f" % (logp)) feed_dict = {x_train:data[2*M/3:]} _, c, normdist, like, mu = sess.run([optim, cost, norm_dist, likelihood, mu], feed_dict=feed_dict) ind = np.argmin(like) val = np.min(like) print("Validation result cost = %.2f. %02d cluster has lowest likelihood %.2f" % (c, ind, val)) # Plotting scatter x_v = data[2*M/3:] trainnormdist = np_mog_dist(x_v, mu_val, sigma_val) t = mog_classify(data, trainnormdist) colors = iter(cm.rainbow(np.linspace(0, 1, len(t)))) plt.clf() for i in range(len(t)): print 'plotting scatter...' print 'cluster x, y shape ', t[i][:, 0].shape, t[i][:, 1].shape color_i=next(colors) plt.scatter(t[i][:, 0], t[i][:, 1], color=color_i) plt.scatter(mu[i][0], mu[i][1], marker='x', color=color_i) #print "returned ", s plt.show() plt.savefig('t22_3_scatter_k%d_with_validation.png' % (i)) print like return cost_l, mu
def compute_log_prob_z_given_x(log_prob_x_given_z, log_pi): #[B,K] [1,K] [B,K] [B,1] return log_pi + log_prob_x_given_z - reduce_logsumexp( tf.add(log_prob_x_given_z, log_pi), keep_dims=True)
def mog_k3(): # Load the data data2D = np.load("data2D.npy") # Set constants. K = 3 DATASET_SIZE, DATA_DIM = data2D.shape LEARNINGRATE = 0.01 ITERATIONS = 750 # Initialize tf graph. graph = tf.Graph() with graph.as_default(): # Load data into tf. tf_data2D = tf.cast(tf.constant(data2D), tf.float32) # Initialize mu array. tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0)) tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM))) tf_sig_sq = tf.exp(tf_phi) tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM))) tf_pi = tf.exp(utils.logsoftmax(tf_psi)) ed = tf_eucl_dist(tf_data2D, tf_mu) loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1)) posterior = tf.exp(log_posterior(tf_data2D, tf_mu, tf_sig_sq, tf_pi, DATA_DIM)) cluster_hard_assignment = tf.argmax(posterior, 1) weight = tf.constant([[0, 0.5, 1.0]]) # TODO: Replace this with linspace as func of K cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1) optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) # Run session. with tf.Session(graph=graph) as session: losses = np.zeros(ITERATIONS, dtype=np.float32) tf.initialize_all_variables().run() for i in range(ITERATIONS): mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior]) _, l, m = session.run([optimizer, loss, tf_mu]) losses[i] = l if i % 100 == 0: print "Loss at iteration %d: " % (i), l print "Mu:" print mu print "Sigma:" print sig_sq print "Pi:" print pi print "Posterior:" print post print "Cluster hard assignment:" print ca red = [1, 0, 0] green = [0, 1, 0] blue = [0, 0, 1] colours = [red, green, blue] colour_list = [colours[ca[i]] for i in range(DATASET_SIZE)] # Plot data points labelled by the closest mean plt.scatter(data2D[:,0], data2D[:,1], c=colour_list, marker='.') # Plot mean plt.scatter(m[:,0], m[:,1], marker='h') plt.show() print m # Plot soft assignment scatterplots # TODO: May be redo it so that C = C1*P(z=1|x) + C2*P(z=1|x) + C3*P(z=1|x) # Where C1 = Red, C2 = Green, C3 = Blue. Right now using colourmap 'viridis' print "Cluster soft assignment:" print ca_soft plt.figure() plt.scatter(data2D[:,0], data2D[:,1], c=ca_soft, cmap='viridis', marker='.') plt.scatter(m[:,0], m[:,1], marker='h') plt.title("Soft Assignment to Gaussian Cluster") # TODO: Add plot title, axis labels plt.show() return
def main2_2_3(data): # Hold out 1/3 of the data for validation and for each value of K = 1; 2; 3; 4; 5, train a MoG # model. For each K, compute and report the loss function for the validation data and explain # which value of K is best. Include a 2D scatter plot of data points colored by their cluster # assignments. # Load the data data2D = np.load(data) # Set constants. DATASET_SIZE, DATA_DIM = data2D.shape LEARNINGRATE = 0.01 ITERATIONS = 750 Ks = range(1, 6) third = DATASET_SIZE / 3 val_data = data2D[:third] train_data = data2D[third:] for K in Ks: # Initialize tf graph. graph = tf.Graph() with graph.as_default(): # Training # Load data into tf. tf_data2D_train = tf.cast(tf.constant(train_data), tf.float32) # Initialize mu array. tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0)) tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM))) tf_sig_sq = tf.exp(tf_phi) tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM))) # tf_pi = tf.nn.softmax(tf_psi) # TODO: Use the utils function instead of the tf.nn.softmax tf_pi = tf.exp(utils.logsoftmax(tf_psi)) ed = tf_eucl_dist(tf_data2D_train, tf_mu) loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D_train,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1)) optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) # Validation # Load data into tf. tf_data2D_val = tf.cast(tf.constant(val_data), tf.float32) loss_v = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D_val,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1)) posterior = tf.exp(log_posterior(tf_data2D_val, tf_mu, tf_sig_sq, tf_pi, DATA_DIM)) cluster_hard_assignment = tf.argmax(posterior, 1) weight = tf.cast(tf.constant(np.linspace(0.0, 1.0, K)), tf.float32) cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1) # Run session. with tf.Session(graph=graph) as session: losses = np.zeros(ITERATIONS, dtype=np.float32) tf.initialize_all_variables().run() for i in range(ITERATIONS): mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior]) _, l, l_v, m = session.run([optimizer, loss, loss_v, tf_mu]) losses[i] = l if i % 10 == 0: print "Loss at iteration %d: " % (i), l_v print "Mu:" print mu print "Sigma:" print sig_sq print "Pi:" print pi print "Posterior:" print post print "Cluster hard assignment:" print ca red = [1, 0, 0] green = [0, 1, 0] blue = [0, 0, 1] cyan = [0, 1, 1] yellow = [1, 1, 0] colours = [red, green, blue, cyan, yellow] colour_list = [colours[ca[i]] for i in range(ca.shape[0])] # print colour_list # Plot data points labelled by the closest mean plt.figure() plt.scatter(val_data[:,0], val_data[:,1], c=colour_list, marker='.') # Plot mean plt.scatter(m[:,0], m[:,1], marker='h', s=200) plt.show() print m # Plot soft assignment scatterplots print "Cluster soft assignment:" print ca_soft plt.figure() plt.scatter(val_data[:,0], val_data[:,1], c=ca_soft, marker='.') plt.scatter(m[:,0], m[:,1], marker='h', s=200) plt.title("Soft Assignment to Gaussian Cluster") plt.show() return
def mog(): # Load the data with np.load('mog_purchases.npz') as datafile: data = datafile[datafile.keys()[0]] # Set constants. K = 3 DATASET_SIZE, DATA_DIM = data.shape LEARNINGRATE = 0.05 ITERATIONS = 10000 # Initialize tf graph. graph = tf.Graph() with graph.as_default(): # Load data into tf. tf_data = tf.cast(tf.constant(data), tf.float32) # Initialize mu array. tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0)) tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM))) tf_sig_sq = tf.exp(tf_phi) tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM))) tf_pi = tf.exp(utils.logsoftmax(tf_psi)) ed = tf_eucl_dist(tf_data, tf_mu) loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1)) posterior = tf.exp(log_posterior(tf_data, tf_mu, tf_sig_sq, tf_pi, DATA_DIM)) cluster_hard_assignment = tf.argmax(posterior, 1) weight = tf.constant([[0, 0.5, 1.0]]) # TODO: Replace this with linspace as func of K cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1) optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) # Run session. with tf.Session(graph=graph) as session: losses = np.zeros(ITERATIONS, dtype=np.float32) tf.initialize_all_variables().run() #pdb.set_trace() for i in range(ITERATIONS): mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior]) _, l, m = session.run([optimizer, loss, tf_mu]) #l = session.run([loss]) #m = session.run([tf_mu]) #losses[i] = l if i % 100 == 0: print "Loss at iteration %d: " % (i), l print "Mu:" print mu print "Sigma:" print sig_sq print "Pi:" print pi print "Posterior:" print post print "Cluster hard assignment:" print ca red = [1, 0, 0] green = [0, 1, 0] blue = [0, 0, 1] colours = [red, green, blue] colour_list = [colours[ca[i]] for i in range(DATASET_SIZE)] # Plot data points labelled by the closest mean plt.scatter(data[:,0], data[:,1], c=colour_list, marker='.') # Plot mean plt.scatter(m[:,0], m[:,1], marker='h') plt.savefig("purchase_kmeans.png") #plt.show() print m down_dim = 2 mu_dim, top_ind = mog_dim_down(m, sig_sq, down_dim) #pdb.set_trace() 2d_data = np.concatenate((data[:, top_ind[0]][:, None], data[:, top_ind[1]][:, None]), axis=1) 2d_mu = np.concatenate((m[:,top_ind[0]][:, None], m[:,top_ind[1]][:, None]), axis=1) 2d_dicts = {'2d_data': 2d_data, 'mu': 2d_mu} np.savez_compressed('purchases_2d', 2d_data) np.savez_compressed('mu_2d', 2d_mu) # Plot soft assignment scatterplots # TODO: May be redo it so that C = C1*P(z=1|x) + C2*P(z=1|x) + C3*P(z=1|x) # Where C1 = Red, C2 = Green, C3 = Blue. Right now using colourmap 'viridis' print "Cluster soft assignment:" print ca_soft print "Top dimensions: %d %d" % (top_ind[0], top_ind[1]) plt.figure() plt.scatter(data[:,top_ind[0]], data[:,top_ind[1]], c=ca_soft, cmap='jet', marker='.') plt.scatter(m[:,top_ind[0]], m[:,top_ind[1]], marker='h') plt.title("Soft Assignment to Gaussian Cluster") # TODO: Add plot title, axis labels plt.savefig("purchase_mog.png") #plt.show() return mu, sig_sq