Ejemplo n.º 1
0
def logZGivenX(X,mu,sigma_squared,piK):
    # log P(z|x) = log P(x|z) + log P(z) - log P(x)
    logXGivenZ = logGaussianDensity(X,mu,sigma_squared) # B x K
    logZ = tf.log(piK) # 1 x K 
    logZ_X = logZ + logXGivenZ # B x K
    logX = reduce_logsumexp(logZ_X, 1,keep_dims = True)
    return logZ_X-logX, logX,logZ_X
Ejemplo n.º 2
0
def build_graph(B, K, learning_rate, D):

    #Parameters
    x = tf.placeholder(tf.float32, [None, 1, D], name="x")  # input data
    mu_T = tf.Variable(tf.random_normal([1, K, D], mean=0, stddev=0.001),
                       [1, K, D],
                       name="mu_T")  # mu tranpose
    phi = tf.Variable(tf.random_normal([1, K], mean=-1, stddev=0), [1, K],
                      name="phi")
    psi = tf.Variable(tf.random_normal([1, K], mean=10, stddev=0), [1, K],
                      name="psi")
    var = tf.exp(phi)  #variance [0, inf)
    log_pi = logsoftmax(psi)  # sum(log_pi) = 1

    #Q2.1.2
    log_prob_x_given_z = compute_log_prob_x_given_z(x, mu_T, var, D)
    #Q2.1.3
    log_prob_z_given_x = compute_log_prob_z_given_x(log_prob_x_given_z, log_pi)

    argmaxs = tf.argmax(log_prob_z_given_x, 1)
    mu = tf.reduce_sum(mu_T, 0)
    #loss
    #[1,1] <------------------- [B,1]
    total_loss = -tf.reduce_mean(
        reduce_logsumexp(tf.add(log_prob_x_given_z, log_pi), keep_dims=True))

    # Adam Optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                       beta1=0.9,
                                       beta2=0.99,
                                       epsilon=1e-5)
    train = optimizer.minimize(loss=total_loss)

    pi = tf.exp(log_pi)
    return x, mu_T, train, mu, total_loss, argmaxs, log_prob_x_given_z, pi, var
Ejemplo n.º 3
0
def marginal_log_likelihood(x, logpz, mu, sigma):

    pxn = reduce_logsumexp(
        tf.transpose(logpz) + log_prob_density(x, mu, sigma), 0)
    px = tf.reduce_sum(pxn, 0)

    return px
Ejemplo n.º 4
0
def log_cluster_probability(x, logpz, mu, sigma):

    log_px_gz = log_prob_density(x, mu, sigma)  # logP(x | z)
    p_xz = logpz + tf.transpose(
        log_px_gz)  #?????????????????????????????????transpose
    p_x = reduce_logsumexp(p_xz, 0)
    log_pz_gx = p_xz / p_x  # pz * P(x | z) / P(x)

    return log_pz_gx
Ejemplo n.º 5
0
def log_posterior(x, mu, sigma, pi, D):
    """ Compute the probability of the cluster variable z given the data vector x
        Input:
            x: BxD matrix
            mu: KxD matrix
            sigma: 1xK matrix
            pi: 1xK matrix
            D: Dimension 
        Output:
            c: BxK matrix: log P(z|x)
    """
    return tf.log(pi) + tf_log_pdf_clust(x, mu, sigma, D) - utils.reduce_logsumexp(tf_log_pdf_clust(x,mu,sigma, D)+tf.log(pi), keep_dims=True)
Ejemplo n.º 6
0
def marginal_log_likelihood(x, logpz, mu, sigma):
    '''
        Calculates the log marginal probability of the data, i.e. logP(x)
    Args:
        x: The data
        pz: Prior probabilities of the clusters
        mu: Cluster means
        sigma: Cluster variance

    Returns:
        Log marginal probability of the data
    '''
    pxn = reduce_logsumexp(tf.transpose(logpz) + log_density(x, mu, sigma),0)
    px = tf.reduce_sum(pxn,0)

    return px
Ejemplo n.º 7
0
def marginal_log_likelihood(x, logpz, mu, sigma):
    '''
        Calculates the log marginal probability of the data, i.e. logP(x)
    Args:
        x: The data
        pz: Prior probabilities of the clusters
        mu: Cluster means
        sigma: Cluster variance

    Returns:
        Log marginal probability of the data
    '''
    pxn = reduce_logsumexp(tf.transpose(logpz) + log_density(x, mu, sigma), 0)
    px = tf.reduce_sum(pxn, 0)

    return px
Ejemplo n.º 8
0
def log_cluster_probability(x, logpz, mu, sigma):
    '''
        Computes the vector of log posterior cluster probabilities given the
        data, prior, mean and variance. P(Z | x)
    Args:
        x: Data
        pz: Prior probabilities
        mu: Gaussian mean
        sigma: Gaussian variance

    Returns:
        Vector of log posterior cluster probabilities
    '''

    logpxgz = log_density(x, mu, sigma) # logP(x | z)
    num = logpz + tf.transpose(logpxgz)
    den = reduce_logsumexp(num, 0)
    logpzgx = num/den # pz * P(x | z) / P(x)

    return logpzgx
Ejemplo n.º 9
0
def log_cluster_probability(x, logpz, mu, sigma):
    '''
        Computes the vector of log posterior cluster probabilities given the
        data, prior, mean and variance. P(Z | x)
    Args:
        x: Data
        pz: Prior probabilities
        mu: Gaussian mean
        sigma: Gaussian variance

    Returns:
        Vector of log posterior cluster probabilities
    '''

    logpxgz = log_density(x, mu, sigma)  # logP(x | z)
    num = logpz + tf.transpose(logpxgz)
    den = reduce_logsumexp(num, 0)
    logpzgx = num / den  # pz * P(x | z) / P(x)

    return logpzgx
Ejemplo n.º 10
0
def t2_validation(lr=0.005, K=3):
    data = utils.load_data('data2D.npy').astype("float32")
    M, D = data.shape

    graph = tf.Graph()
    with graph.as_default():
        x_train = tf.placeholder(tf.float32, shape=(None, D))
        mu = tf.Variable(tf.truncated_normal([K, D], dtype=tf.float32))
        # Assume isotropic variance
        sigma = tf.Variable(tf.truncated_normal([K], dtype=tf.float32))

        likelihood = std_z(x_train, mu, sigma)
        log_like_z, z = mog_log_likelihood_z(x_train, mu, sigma)
        logProb = mog_logprob(log_like_z)

        norm_dist = mog_dist(x_train, mu, sigma)
        cost = utils.reduce_logsumexp(likelihood, 0)
        optim = tf.train.AdamOptimizer(learning_rate=lr,
                                       beta1=0.9,
                                       beta2=0.99,
                                       epsilon=1e-5).minimize(cost)

    epochs = 100

    with tf.Session(graph=graph) as sess:

        tf.initialize_all_variables().run()
        cost_l = []

        for epoch in range(epochs):

            x_batch = data[:2 * M / 3]
            feed_dict = {x_train: x_batch}

            _, c, like, log_pz, logp, zval, mu_val, sigma_val = sess.run(
                [optim, cost, likelihood, log_like_z, logProb, z, mu, sigma],
                feed_dict=feed_dict)
            cost_l.append(c)
            ind = np.argmin(like)
            val = np.min(like)
            if epoch % 10 == 0:
                #print log_pz.shape, logp
                #print log_pz[:10]
                #print zval
                print(
                    "Epoch %03d, cost = %.2f. %02d cluster has lowest likelihood %.2f"
                    % (epoch, c, ind, val))
                #print("Log prob %.2f" % (logp))

        feed_dict = {x_train: data[2 * M / 3:]}
        _, c, normdist, like, mu = sess.run(
            [optim, cost, norm_dist, likelihood, mu], feed_dict=feed_dict)
        ind = np.argmin(like)
        val = np.min(like)
        print(
            "Validation result cost = %.2f. %02d cluster has lowest likelihood %.2f"
            % (c, ind, val))
        # Plotting scatter
        x_v = data[2 * M / 3:]
        trainnormdist = np_mog_dist(x_v, mu_val, sigma_val)
        t = mog_classify(data, trainnormdist)
        colors = iter(cm.rainbow(np.linspace(0, 1, len(t))))
        plt.clf()
        for i in range(len(t)):
            print 'plotting scatter...'
            print 'cluster x, y shape ', t[i][:, 0].shape, t[i][:, 1].shape
            color_i = next(colors)
            plt.scatter(t[i][:, 0], t[i][:, 1], color=color_i)
            plt.scatter(mu[i][0], mu[i][1], marker='x', color=color_i)
            #print "returned ", s
        plt.show()
        plt.savefig('t22_3_scatter_k%d_with_validation.png' % (i))

        print like

    return cost_l, mu
Ejemplo n.º 11
0
def mog_logprob(log_likelihood_z):
    return log_likelihood_z - utils.reduce_logsumexp(log_likelihood_z,
                                                     keep_dims=True)
tf_sub_square_sum = tf.reduce_sum(tf_sub_square, 2, True)
tf_sub_square_sum_02 = tf.squeeze(tf.transpose(tf_sub_square_sum))
tf_index = (-0.5) * tf.div(tf_sub_square_sum_02, tf_covariance)
tf_log_second_term = tf_index
tf_log_first_term = (-0.5 * dim) * tf.log(2 * math.pi * tf_covariance)

# log(P(x|z))
tf_log_x_gan_z = tf.add(tf_log_first_term, tf_log_second_term)
# log(P(x,z))
tf_log_pro_z_x_gan_z = tf.add(log_pi, tf_log_x_gan_z)

#tf_pro_z_x_gan_z = tf.exp(tf_log_pro_z_x_gan_z)
#tf_sum_pro_z_x_gan_z = utils.reduce_logsumexp(tf_pro_z_x_gan_z, 1)
#tf_log_sum_pro_z_x_gan_z = tf.log(tf_sum_pro_z_x_gan_z)

tf_log_sum_pro_z_x_gan_z = utils.reduce_logsumexp(tf_log_pro_z_x_gan_z, 1)
#tf_log_like = tf.reduce_mean(tf_log_sum_pro_z_x_gan_z)
tf_log_like = tf.reduce_sum(tf_log_sum_pro_z_x_gan_z)
tf_loss = -1 * tf_log_like

optimizer = tf.train.AdamOptimizer(0.01, 0.9, 0.99, 1e-5).minimize(tf_loss)
#optimizer = tf.train.AdamOptimizer(0.001).minimize(tf_loss)
#optimizer = tf.train.GradientDescentOptimizer(0.005).minimize(tf_loss)

#tf_assignments = tf.argmax(tf_log_pro_z_x_gan_z, 1)
#tf_assignments = tf.argmax(tf_log_x_gan_z, 1)
#tf_assignments = tf.argmin(eucl_distance(tf_data, tf_mean), dimension = 1)


tf_assignments_01 = tf.transpose(tf.expand_dims(tf.reduce_sum(tf_log_pro_z_x_gan_z, 1), 0))
tf_assignments_02 = tf.sub(tf_log_pro_z_x_gan_z, tf_assignments_01)
Ejemplo n.º 13
0
def mog_logprob(log_likelihood_z):
    return log_likelihood_z - utils.reduce_logsumexp(log_likelihood_z, keep_dims=True)
Ejemplo n.º 14
0
def t2_validation(lr=0.005, K=3):
    data = utils.load_data('data2D.npy').astype("float32")
    M, D = data.shape

    graph = tf.Graph()
    with graph.as_default():
        x_train = tf.placeholder(tf.float32, shape=(None, D))
        mu  = tf.Variable(tf.truncated_normal([K, D], dtype=tf.float32))
        # Assume isotropic variance
        sigma  = tf.Variable(tf.truncated_normal([K], dtype=tf.float32))

        likelihood = std_z(x_train, mu, sigma)
        log_like_z, z = mog_log_likelihood_z(x_train, mu, sigma)
        logProb = mog_logprob(log_like_z)

        norm_dist = mog_dist(x_train, mu, sigma)
        cost = utils.reduce_logsumexp(likelihood, 0)
        optim = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(cost)

    epochs = 100

    with tf.Session(graph=graph) as sess:

        tf.initialize_all_variables().run()
        cost_l = []
 
        for epoch in range(epochs):

            x_batch = data[:2*M/3]
            feed_dict={x_train:x_batch}
            
            _, c, like, log_pz, logp, zval, mu_val, sigma_val = sess.run([optim, cost, likelihood, log_like_z, logProb, z, mu, sigma], feed_dict=feed_dict)
            cost_l.append(c)
            ind = np.argmin(like)
            val = np.min(like)
            if epoch % 10 == 0:
                #print log_pz.shape, logp
                #print log_pz[:10]
                #print zval
                print("Epoch %03d, cost = %.2f. %02d cluster has lowest likelihood %.2f" % (epoch, c, ind, val))
                #print("Log prob %.2f" % (logp))


        feed_dict = {x_train:data[2*M/3:]}
        _, c, normdist, like, mu = sess.run([optim, cost, norm_dist, likelihood, mu], feed_dict=feed_dict)
        ind = np.argmin(like)
        val = np.min(like)
        print("Validation result cost = %.2f. %02d cluster has lowest likelihood %.2f" % (c, ind, val))
# Plotting scatter
        x_v = data[2*M/3:]
        trainnormdist = np_mog_dist(x_v, mu_val, sigma_val)
        t = mog_classify(data, trainnormdist)
        colors = iter(cm.rainbow(np.linspace(0, 1, len(t))))
        plt.clf()
        for i in range(len(t)):
            print 'plotting scatter...'
            print 'cluster x, y shape ', t[i][:, 0].shape, t[i][:, 1].shape
            color_i=next(colors)
            plt.scatter(t[i][:, 0], t[i][:, 1], color=color_i)
            plt.scatter(mu[i][0], mu[i][1], marker='x', color=color_i)
            #print "returned ", s
        plt.show() 
        plt.savefig('t22_3_scatter_k%d_with_validation.png' % (i))


        print like

    return cost_l, mu
Ejemplo n.º 15
0
def compute_log_prob_z_given_x(log_prob_x_given_z, log_pi):
    #[B,K]     [1,K]         [B,K]                     [B,1]
    return log_pi + log_prob_x_given_z - reduce_logsumexp(
        tf.add(log_prob_x_given_z, log_pi), keep_dims=True)
Ejemplo n.º 16
0
def mog_k3():
    # Load the data
    data2D = np.load("data2D.npy")

    # Set constants.
    K = 3
    DATASET_SIZE, DATA_DIM  = data2D.shape
    LEARNINGRATE = 0.01
    ITERATIONS = 750
    
    # Initialize tf graph.
    graph = tf.Graph()
    with graph.as_default():
        # Load data into tf.
        tf_data2D = tf.cast(tf.constant(data2D), tf.float32)
        
        # Initialize mu array.
        tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0))
        tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM)))
        tf_sig_sq = tf.exp(tf_phi)
        tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM)))
        tf_pi = tf.exp(utils.logsoftmax(tf_psi))
    
        ed = tf_eucl_dist(tf_data2D, tf_mu)
        loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1))
        posterior = tf.exp(log_posterior(tf_data2D, tf_mu, tf_sig_sq, tf_pi, DATA_DIM))
        cluster_hard_assignment = tf.argmax(posterior, 1)
        weight = tf.constant([[0, 0.5, 1.0]]) # TODO: Replace this with linspace as func of K
        cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1)
        optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss)

    # Run session.
    with tf.Session(graph=graph) as session:
        
        losses = np.zeros(ITERATIONS, dtype=np.float32)
        tf.initialize_all_variables().run()

        for i in range(ITERATIONS):
            mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior])
            _, l, m = session.run([optimizer, loss, tf_mu])
            losses[i] = l
            if i % 100 == 0:
                print "Loss at iteration %d: " % (i), l 
            
        print "Mu:"
        print mu
        print "Sigma:"
        print sig_sq
        print "Pi:"
        print pi
        print "Posterior:"
        print post
        print "Cluster hard assignment:"
        print ca
        red = [1, 0, 0]
        green = [0, 1, 0]
        blue = [0, 0, 1]
        colours = [red, green, blue]
        colour_list = [colours[ca[i]] for i in range(DATASET_SIZE)]
        
        # Plot data points labelled by the closest mean  
        plt.scatter(data2D[:,0], data2D[:,1], c=colour_list, marker='.')
        # Plot mean
        plt.scatter(m[:,0], m[:,1], marker='h')
        plt.show()
        print m
        
        # Plot soft assignment scatterplots
        # TODO: May be redo it so that C = C1*P(z=1|x) + C2*P(z=1|x) + C3*P(z=1|x)
        # Where C1 = Red, C2 = Green, C3 = Blue. Right now using colourmap 'viridis'
        print "Cluster soft assignment:"
        print ca_soft
        plt.figure()
        plt.scatter(data2D[:,0], data2D[:,1], c=ca_soft, cmap='viridis', marker='.')
        plt.scatter(m[:,0], m[:,1], marker='h')
        plt.title("Soft Assignment to Gaussian Cluster")
        # TODO: Add plot title, axis labels
        plt.show()
    
    return
Ejemplo n.º 17
0
def main2_2_3(data):
	# Hold out 1/3 of the data for validation and for each value of K = 1; 2; 3; 4; 5, train a MoG
	# model. For each K, compute and report the loss function for the validation data and explain
	# which value of K is best. Include a 2D scatter plot of data points colored by their cluster
	# assignments.

    # Load the data
    data2D = np.load(data)

    # Set constants.
    DATASET_SIZE, DATA_DIM  = data2D.shape
    LEARNINGRATE = 0.01
    ITERATIONS = 750
    
    Ks = range(1, 6)
    
    third = DATASET_SIZE / 3
    val_data = data2D[:third]
    train_data = data2D[third:]
    
    for K in Ks:
        # Initialize tf graph.
        graph = tf.Graph()
        with graph.as_default():
            # Training
            # Load data into tf.
            tf_data2D_train = tf.cast(tf.constant(train_data), tf.float32)

            # Initialize mu array.
            tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0))
            tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM)))
            tf_sig_sq = tf.exp(tf_phi)
            tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM)))
    #         tf_pi = tf.nn.softmax(tf_psi) # TODO: Use the utils function instead of the tf.nn.softmax
            tf_pi = tf.exp(utils.logsoftmax(tf_psi))

            ed = tf_eucl_dist(tf_data2D_train, tf_mu)
            loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D_train,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1))
            
            optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss)
            
            # Validation
            # Load data into tf.
            tf_data2D_val = tf.cast(tf.constant(val_data), tf.float32)
            loss_v = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D_val,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1))
            posterior = tf.exp(log_posterior(tf_data2D_val, tf_mu, tf_sig_sq, tf_pi, DATA_DIM))
            cluster_hard_assignment = tf.argmax(posterior, 1)
            weight = tf.cast(tf.constant(np.linspace(0.0, 1.0, K)), tf.float32)
            cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1)
            
        # Run session.
        with tf.Session(graph=graph) as session:

            losses = np.zeros(ITERATIONS, dtype=np.float32)
            tf.initialize_all_variables().run()

            for i in range(ITERATIONS):
                mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior])
                _, l, l_v, m = session.run([optimizer, loss, loss_v, tf_mu])
                losses[i] = l
                if i % 10 == 0:
                    print "Loss at iteration %d: " % (i), l_v 

            print "Mu:"
            print mu
            print "Sigma:"
            print sig_sq
            print "Pi:"
            print pi
            print "Posterior:"
            print post
            print "Cluster hard assignment:"
            print ca
            
            red = [1, 0, 0]
            green = [0, 1, 0]
            blue = [0, 0, 1]
            cyan = [0, 1, 1]
            yellow = [1, 1, 0]
            colours = [red, green, blue, cyan, yellow]
            
            colour_list = [colours[ca[i]] for i in range(ca.shape[0])]
    #         print colour_list

            # Plot data points labelled by the closest mean  
            plt.figure()
            plt.scatter(val_data[:,0], val_data[:,1], c=colour_list, marker='.')
            # Plot mean
            plt.scatter(m[:,0], m[:,1], marker='h', s=200)
            plt.show()
            print m

            # Plot soft assignment scatterplots
            print "Cluster soft assignment:"
            print ca_soft
            plt.figure()
            plt.scatter(val_data[:,0], val_data[:,1], c=ca_soft, marker='.')
            plt.scatter(m[:,0], m[:,1], marker='h', s=200)
            plt.title("Soft Assignment to Gaussian Cluster")
            plt.show()

    return
Ejemplo n.º 18
0
def mog():
    # Load the data
    with np.load('mog_purchases.npz') as datafile:
        data = datafile[datafile.keys()[0]]

    # Set constants.
    K = 3
    DATASET_SIZE, DATA_DIM  = data.shape
    LEARNINGRATE = 0.05
    ITERATIONS = 10000
    
    # Initialize tf graph.
    graph = tf.Graph()
    with graph.as_default():
        # Load data into tf.
        tf_data = tf.cast(tf.constant(data), tf.float32)
        
        # Initialize mu array.
        tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0))
        tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM)))
        tf_sig_sq = tf.exp(tf_phi)
        tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM)))
        tf_pi = tf.exp(utils.logsoftmax(tf_psi))
    
        ed = tf_eucl_dist(tf_data, tf_mu)
        loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1))
        posterior = tf.exp(log_posterior(tf_data, tf_mu, tf_sig_sq, tf_pi, DATA_DIM))
        cluster_hard_assignment = tf.argmax(posterior, 1)
        weight = tf.constant([[0, 0.5, 1.0]]) # TODO: Replace this with linspace as func of K
        cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1)
        optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss)

    # Run session.
    with tf.Session(graph=graph) as session:
        
        losses = np.zeros(ITERATIONS, dtype=np.float32)
        tf.initialize_all_variables().run()
        #pdb.set_trace()

        for i in range(ITERATIONS):
            mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior])
            _, l, m = session.run([optimizer, loss, tf_mu])
            #l = session.run([loss])
            #m = session.run([tf_mu])
            #losses[i] = l
            if i % 100 == 0:
                print "Loss at iteration %d: " % (i), l 
            
        print "Mu:"
        print mu
        print "Sigma:"
        print sig_sq
        print "Pi:"
        print pi
        print "Posterior:"
        print post
        print "Cluster hard assignment:"
        print ca
        red = [1, 0, 0]
        green = [0, 1, 0]
        blue = [0, 0, 1]
        colours = [red, green, blue]
        colour_list = [colours[ca[i]] for i in range(DATASET_SIZE)]
        
        # Plot data points labelled by the closest mean  
        plt.scatter(data[:,0], data[:,1], c=colour_list, marker='.')
        # Plot mean
        plt.scatter(m[:,0], m[:,1], marker='h')
        plt.savefig("purchase_kmeans.png")
        #plt.show()
        print m
        
        down_dim = 2
        mu_dim, top_ind = mog_dim_down(m, sig_sq, down_dim)
        #pdb.set_trace()
        2d_data = np.concatenate((data[:, top_ind[0]][:, None], data[:, top_ind[1]][:, None]), axis=1)
        2d_mu = np.concatenate((m[:,top_ind[0]][:, None], m[:,top_ind[1]][:, None]), axis=1)
        2d_dicts = {'2d_data': 2d_data,
                'mu': 2d_mu}

        np.savez_compressed('purchases_2d',
                2d_data)
        np.savez_compressed('mu_2d',
                2d_mu)
        # Plot soft assignment scatterplots
        # TODO: May be redo it so that C = C1*P(z=1|x) + C2*P(z=1|x) + C3*P(z=1|x)
        # Where C1 = Red, C2 = Green, C3 = Blue. Right now using colourmap 'viridis'
        print "Cluster soft assignment:"
        print ca_soft
        print "Top dimensions: %d %d" % (top_ind[0], top_ind[1])
        plt.figure()
        plt.scatter(data[:,top_ind[0]], data[:,top_ind[1]], c=ca_soft, cmap='jet', marker='.')
        plt.scatter(m[:,top_ind[0]], m[:,top_ind[1]], marker='h')
        plt.title("Soft Assignment to Gaussian Cluster")
        # TODO: Add plot title, axis labels
        plt.savefig("purchase_mog.png")

        #plt.show()
    
    return mu, sig_sq