def MoG(dataset, K, alpha): N, D = num_pts, dim X = tf.placeholder(tf.float32, shape=(N, D), name="X") MU = tf.get_variable(name="MU", initializer=tf.random.normal(shape=[K, D])) sigma = tf.get_variable(name="sigma", initializer=tf.random.normal(shape=[K, 1])) pi = tf.get_variable(name="pi", initializer=tf.random.normal(shape=[1, K])) # Take the expononent of the sigma as per instructions sexp = tf.exp(sigma) # compute the P(xn | zn = K) log_PDF = log_GaussPDF(X, MU, sexp) #print(hlp.logsoftmax(pi).shape, log_PDF.shape) sum_l = hlp.reduce_logsumexp(hlp.logsoftmax(pi) + log_PDF) #print(sum_l.shape) loss = -1 * tf.reduce_sum(sum_l) opt = tf.train.AdamOptimizer(learning_rate=alpha, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) # Find the log posterioer for plotting the clusters at the end sm = hlp.logsoftmax(log_PDF) return MU, X, loss, opt, sigma, pi, sm
def GMM(K, D): tf.set_random_seed(421) X = tf.placeholder(tf.float32, shape=(None, D), name="trainData") means = tf.Variable(tf.random_normal(shape=[K, D], stddev=1.0), name="means") phi = tf.Variable(tf.random_normal(shape=[K, 1], stddev=1.0), name="Phi") psi = tf.Variable(tf.random_normal(shape=[K, 1], stddev=1.0), name="Psi") sigma = tf.sqrt(tf.exp(phi)) psi_soft = hlp.logsoftmax(psi) prob = tf.exp(psi_soft) log_gauss = log_GaussPDF(X, means, sigma) loss = -tf.reduce_sum(hlp.reduce_logsumexp( log_gauss + tf.transpose(tf.log(prob)), 1), axis=0) # Returns a Nx1 vector of best_cluster assignments best_cluster = tf.argmax(log_posterior(log_gauss, prob), axis=1) optimizer = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) return X, means, prob, best_cluster, loss, optimizer, log_gauss
def main(): data = np.load('data2D.npy') [num_pts, dim] = np.shape(data) is_valid = False K = 5 # For Validation set if is_valid: valid_batch = int(num_pts / 3.0) np.random.seed(45689) rnd_idx = np.arange(num_pts) np.random.shuffle(rnd_idx) val_data = data[rnd_idx[:valid_batch]] data = data[rnd_idx[valid_batch:]] X = tf.placeholder(tf.float32, [num_pts, dim], name='X') mu = tf.Variable(tf.truncated_normal([K, dim],stddev=0.05),name="mu") phi = tf.Variable(tf.zeros([K, 1]),name="sigma") sigma = tf.exp(phi) #clip_op = tf.assign(sigma, tf.clip_by_value(sigma, 0, np.infty)) log_PDF = log_GaussPDF(X, mu,sigma) #temp_pi = np.float32(np.ones((K,1))) #temp_pi[:] = 0 pi = tf.Variable(tf.truncated_normal([K, 1],stddev=0.05),name="pi") log_pi = hlp.logsoftmax(pi) log_Posterior = log_posterior(log_PDF, log_pi) logPX = tf.reduce_sum(hlp.reduce_logsumexp(tf.add(tf.transpose(log_pi),log_PDF))) print logPX.shape Loss = -logPX train_op = tf.train.AdamOptimizer(learning_rate = 0.01).minimize(Loss) sess = tf.Session() distances = distanceFunc(X, mu) nearestIndices = tf.argmax(log_Posterior, 1) partitions = tf.dynamic_partition(X,tf.to_int32(nearestIndices),K) with sess.as_default(): init = tf.global_variables_initializer() sess.run(init) for i in range(800): sess.run(train_op,feed_dict={X:np.float32(data)}) #sess.run(clip_op) print sess.run(mu) print sess.run(tf.exp(log_pi)) print(sess.run(Loss,feed_dict={X:np.float32(data)})) updated_centroid_value = sess.run(mu) part = sess.run(partitions,feed_dict={X:np.float32(data)}) for data in part: print len(data) plt.figure() colour = plt.cm.rainbow(np.linspace(0,1,len(updated_centroid_value))) for i, centroid in enumerate(updated_centroid_value): print len(part[i]) for j,point in enumerate(part[i]): plt.scatter(point[0], point[1], c=colour[i]) plt.plot(centroid[0], centroid[1], markersize=35, marker="x", color='k') plt.savefig( 'cluster5' + '.png') #plt.show() plt.figure() plt.scatter(data[:,0], data[:,1]) plt.savefig('Originaldata' + '.png')
def gMM(): X = tf.placeholder(tf.float32, [None, dim], name="X") mu = tf.get_variable('mean', dtype=tf.float32, shape=[K, dim], initializer=tf.truncated_normal_initializer(stddev=2)) phi = tf.get_variable('stdDev', dtype=tf.float32, shape=[K, 1], initializer=tf.truncated_normal_initializer( mean=4, stddev=0.5)) sigma = tf.abs(phi) psi = tf.get_variable('logPiProb', dtype=tf.float32, shape=[K, 1], initializer=tf.truncated_normal_initializer( mean=1, stddev=0.25)) log_pi = hlp.logsoftmax(psi) log_PDF = log_GaussPDF(X, mu, sigma) log_rnj = log_posterior(log_PDF, log_pi) lossfunc = neg_log_likelihood(log_PDF, log_pi) belong = tf.arg_max(log_rnj, dimension=1) optimizer = tf.train.AdamOptimizer(learning_rate=0.05, beta1=0.9, beta2=0.99, epsilon=1e-5) train = optimizer.minimize(loss=lossfunc) return X, mu, sigma, lossfunc, log_pi, log_PDF, log_rnj, train, belong
def build_graph(K, D, alpha): tf.set_random_seed(421) # create placeholders for x, y and alpha X = tf.placeholder(dtype=tf.float32, shape=[None, D], name="data") MU = tf.Variable( tf.random_normal(dtype=tf.float32, shape=[K, D], name="MU")) phi = tf.Variable( tf.truncated_normal([1, K], mean=0.0, stddev=1.0, dtype=tf.float32)) #MU = tf.get_variable(tf.float32, shape=(K, D), name="MU") learning_rate = tf.placeholder(dtype=tf.float32, name="learning_rate") sigma = tf.Variable( tf.random_normal(dtype=tf.float32, shape=[K, 1], name="sigma")) log_pi = hlp.logsoftmax(phi) # compute the P(xn | zn = K) log_PDF = log_GaussPDF(X, MU, sigma) # compute the P(z = k) log_post = log_posterior(log_PDF, log_pi) pred = tf.argmax(log_post, axis=1) total_loss = calculate_loss(log_PDF, log_post) gd_optimizer = tf.train.GradientDescentOptimizer( learning_rate=alpha, name="Adam").minimize(total_loss) return total_loss, gd_optimizer, X, MU, pred, learning_rate
def MoG(): X = tf.placeholder(tf.float32, [None, D], name="X") MU = tf.get_variable('mean', dtype=tf.float32, shape=[K, D], initializer=tf.initializers.random_normal()) Psi = tf.get_variable('variance', dtype=tf.float32, shape=[K, 1], initializer=tf.initializers.random_normal()) Pi = tf.get_variable('posterior', dtype=tf.float32, shape=[K, 1], initializer=tf.initializers.random_normal()) log_Pi = hlp.logsoftmax(Pi) Sigma2 = tf.exp(Psi) Gauss_PDF = log_GaussPDF(X, MU, Sigma2) Log_Post = log_posterior(Gauss_PDF, log_Pi) Belong = tf.arg_max(Log_Post, dimension=1) lossfunc = -tf.reduce_sum(hlp.reduce_logsumexp(Log_Post)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.99, epsilon=1e-5) train = optimizer.minimize(loss=lossfunc) return X, MU, Psi, Pi, lossfunc, Belong, train
def compute_loss(X, mu, sigma, pi): log_pi = tf.squeeze(hlp.logsoftmax(pi)) log_pdf = log_GaussPDF(X, mu, sigma) log_loss = -1 * tf.reduce_sum( hlp.reduce_logsumexp(log_pdf + log_pi, keep_dims=True)) return log_loss, log_pdf, log_pi
def buildGraph(K, D): tf.compat.v1.set_random_seed(421) MU = tf.Variable(tf.random.normal(shape=[K, D])) psi = tf.Variable(tf.random.normal(shape=[K, 1])) phi = tf.Variable(tf.random.normal(shape=[K, 1])) X = tf.compat.v1.placeholder(tf.float32, [None, D]) sigma = tf.sqrt(tf.exp(phi)) log_pi = hlp.logsoftmax(psi) pi = tf.math.exp(log_pi) logPDF = log_GaussPDF(X, MU, sigma) post = log_posterior(logPDF, log_pi) assignments = tf.math.argmax(post, axis=1) loss = -tf.reduce_sum( hlp.reduce_logsumexp(tf.transpose(log_pi) + logPDF, keep_dims=True)) optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.99, epsilon=1e-5) train = optimizer.minimize(loss) return MU, sigma, pi, X, assignments, loss, train
def GMM(K): # define the model parameters MU = tf.Variable( tf.truncated_normal(shape=[K, dim]), name="MU", ) sigma = tf.Variable(tf.truncated_normal(shape=[K, 1]), name="sigma") sigma = tf.exp(sigma) log_pi = tf.Variable(tf.truncated_normal(shape=[K, 1]), name="pi") log_pi = hlp.logsoftmax(log_pi) # input data X = tf.placeholder(tf.float32, [None, dim], name='data') # call the log_PDF log_PDF = log_GaussPDF(X, MU, sigma) # find the most possible cluster for each point log_post = log_posterior(log_PDF, log_pi) assigned = tf.argmax(log_post, 1) # define the loss function #print(loss.get_shape().as_list()) loss = tf.add(log_PDF, tf.transpose(log_pi)) loss = hlp.reduce_logsumexp(loss) loss = -1 * tf.reduce_sum(loss) # train the model train = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) return MU, sigma, log_pi, X, assigned, loss, train
def log_posterior(log_PDF, log_pi): # Input # log_PDF: log Gaussian PDF N X K # log_pi: K X 1 # Outputs # log_post: N X K return hlp.logsoftmax(log_PDF + tf.transpose(log_pi))
def log_posterior(log_PDF, log_pi): # Input # log_PDF: log Gaussian PDF N X K # log_pi: K X 1 # # Outputs # log_post: N X K p_xz = tf.add(log_PDF, log_pi) return hlp.logsoftmax(p_xz)
def gMM(): X = tf.placeholder(tf.float32, [None, dim], name="X") #mu_init = np.array([[-1.01,-4.01],[0.01,-1.01],[1.1,0.5]]) # mu_init = np.zeros((K,2)) # for i in range(K): # y = 3*m.sin((i/K)*2*m.pi) # x = 3*m.cos((i/K)*2*m.pi) # mu_init[i] = [x,y] # print(i,x,y) mu = tf.get_variable('mean', dtype=tf.float32, shape=[K, dim], initializer=tf.truncated_normal_initializer(stddev=2)) #mu = tf.get_variable('mean',dtype = tf.float32,shape = [K,dim], initializer = tf.truncated_normal_initializer(stddev=0.25)) #mu = tf.get_variable('mean',dtype = tf.float32, initializer = tf.to_float(mu_init)) #testing = tf.get_variable('test',dtype = tf.float32,shape = [K,1], initializer = tf.initializers.random_normal()) #sigma_holder = tf.get_variable('stdDev',dtype = tf.float32, initializer = tf.to_float(np.zeros((K,1)))) #sigma_holder = tf.get_variable('stdDev',dtype = tf.float32, initializer = tf.to_float(np.ones((K,1)))) sigma_holder = tf.get_variable('stdDev', dtype=tf.float32, shape=[K, 1], initializer=tf.truncated_normal_initializer( mean=1, stddev=0.25)) #sigma = tf.exp(sigma_holder) #sigma = tf.abs(sigma_holder) #sigma = tf.pow(1.2,sigma_holder) sigma = tf.pow(sigma_holder, 2) #pi_holder = tf.get_variable('logPiProb',dtype = tf.float32, initializer = tf.to_float((1/K)*np.ones((K,1)))) pi_holder = tf.get_variable('logPiProb', dtype=tf.float32, shape=[K, 1], initializer=tf.truncated_normal_initializer( mean=1, stddev=0.25)) log_pi = hlp.logsoftmax(pi_holder) log_PDF = log_GaussPDF(X, mu, sigma) log_rnj = log_posterior(log_PDF, log_pi) lossfunc = neg_log_likelihood(log_PDF, log_pi) belong = tf.arg_max(log_rnj, dimension=1) #optimizer = tf.train.GradientDescentOptimizer(0.00005) optimizer = tf.train.AdamOptimizer(learning_rate=0.05, beta1=0.9, beta2=0.99, epsilon=1e-5) #optimizer = tf.train.MomentumOptimizer(0.00001,0.2) train = optimizer.minimize(loss=lossfunc) return X, mu, sigma, lossfunc, log_pi, log_PDF, log_rnj, train, belong, sigma_holder
def log_posterior(log_PDF, log_pi): # Input # log_PDF: log Gaussian PDF N X K # log_pi: K X 1 # Outputs # log_post: N X K input_tensor = log_PDF + tf.squeeze(log_pi) log_post = hlp.logsoftmax(input_tensor) # uses logsumexp return log_post
def MOGLoss(K): N, D = num_pts, dim X = tf.placeholder(tf.float32, shape=(N, D), name="X_VAL") MU = tf.get_variable(name="MU_VAL", initializer=tf.random.normal(shape=[K, D])) sigma = tf.get_variable(shape=(K, 1), name="sigma_VAL") pi = tf.get_variable(shape=(1, K), name="pi_VAL") # Take the expononent of the sigma as per instructions sexp = tf.exp(sigma) # compute the P(xn | zn = K) log_PDF = log_GaussPDF(X, MU, sexp) sum_l = hlp.reduce_logsumexp(hlp.logsoftmax(pi) + log_PDF) loss = -1 * tf.reduce_sum(sum_l) # Find the log posterioer for plotting the clusters at the end sm = hlp.logsoftmax(log_PDF) return MU, X, loss, sigma, pi, sm
def initializeVars(): centroid = tf.get_variable('mean', shape=(k, dim), initializer=tf.initializers.random_normal()) phi = tf.get_variable('phi', shape=(1, k), initializer=tf.initializers.random_normal() ) # Unconstrained form of variance variance = tf.math.exp(phi) # Constrained form of variance gamma = tf.get_variable('gamma', shape=(k, 1), initializer=tf.initializers.random_normal() ) # Unconstrained form of weights logPi = tf.transpose(hlp.logsoftmax(gamma)) # Constrained form of weights return centroid, variance, logPi
def build_graph(d, k, lr): x = tf.placeholder(dtype=tf.float32, shape=[None, d]) mu = tf.Variable(initial_value=tf.random_normal(shape=[k, d])) sigma = tf.Variable(initial_value=tf.random_normal(shape=[k, 1])) pi = tf.Variable(initial_value=tf.random_normal(shape=[k, 1])) e_sigma = tf.exp(sigma) log_pi = hlp.logsoftmax(tf.reshape(pi, [-1])) loss = neg_log_prob(x, mu, e_sigma, log_pi) log_pdf = log_gauss_pdf(x, mu, e_sigma) log_post = log_posterior(log_pdf, log_pi) assignments = tf.argmax(log_post, axis=1) loss = tf.reduce_sum(loss) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.99, epsilon=1e-5) optimizer_op = optimizer.minimize(loss) return x, mu, sigma, pi, assignments, loss, optimizer_op
def buildGraph(learning_rate, dim, k): # Variable creation input_x = tf.placeholder(tf.float32, [None, dim], name='input_x') k_centers = tf.Variable(tf.random_normal([k, dim], stddev=0.5)) phi = tf.Variable(tf.random_normal([k, 1], stddev=0.5)) var = tf.exp(phi) sai = tf.Variable(tf.random_normal([k, 1], stddev=0.5)) logpi = tf.exp(logsoftmax(sai)) log_posterior1 = log_posterior(logpi, input_x, k_centers, var) #test = tf.constant(1, tf.float32, shape=[3, 2]) #test1, test2, test3 = reduce_logsumexp(test, reduction_indices=1, keep_dims=False) Ein = loss(logpi, input_x, k_centers, var) predicted = tf.argmax(log_posterior1, 1) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss=Ein) return input_x, k_centers, optimizer, logpi, predicted, var, Ein
def learning(data, val_data = None, learning_rate = 0.01,epsilon=1e-5, epochs = 600): N = num_pts D = dim K = num_class Y = [] Y_v = [] Xv = [] tf.set_random_seed(421) # tf.random_normal([K,1], mean = 0, stddev=0.5) X = tf.placeholder(dtype=tf.float32, name="data") # N x D sigma = tf.Variable(tf.exp(tf.random_normal([K], mean = 0.0, stddev=0.5))) pi = logsoftmax(tf.Variable(tf.random_normal([K,1], mean = 0.0, stddev=0.5))) MU = tf.Variable(tf.random_normal([K, D],mean = 0.0 , stddev = 1.0 , dtype = tf.float32), name="mu") error ,log_probs = neg_log(X,MU,sigma,pi) sols = tf.argmax(log_probs,axis =1) optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate, beta1=0.9, beta2=0.99, epsilon = 1e-5).minimize(error) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(epochs): logs,m,er,op = sess.run([log_probs, MU,error,optimizer] , feed_dict={X: data}) if is_valid == True: logs_v, m_v, er_v, op_v = sess.run([log_probs, MU,error,optimizer] , feed_dict={X: val_data}) Y_v.append(er_v) Y.append(er) Xv.append(i) cluster_assignments = sess.run(sols, feed_dict={X: data}) M = MU.eval() P = pi.eval() S = sigma.eval() # print 'Training Error for K = ' + str(num_class) + ': ', Y[-1] # print 'Validation Error for K = ' + str(num_class) + ': ', Y_v[-1] list_clusters = groupClusters(data, logs) return list_clusters, M, P, S, Xv, Y, Y_v
def buildGraph(): k = 3 x = tf.placeholder(tf.float32, [None, dim]) mu = tf.get_variable("MU", initializer=tf.truncated_normal(shape=(k, dim))) sigma = tf.get_variable("sigma", initializer=tf.truncated_normal(shape=(k, 1))) pi = tf.get_variable("pi", initializer=tf.truncated_normal(shape=(k, 1))) log_pi = hlp.logsoftmax(pi) log_PDF = log_GaussPDF(x, mu, tf.sqrt(tf.exp(sigma))) log_post = log_posterior(log_PDF, log_pi) cluster = tf.argmax(log_post, axis=1) loss = -tf.reduce_sum( hlp.reduce_logsumexp(log_PDF + tf.transpose(log_pi), keep_dims=True)) optimizer = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.99, epsilon=1e-5) training_op = optimizer.minimize(loss=loss) return training_op, x, mu, cluster, loss, log_pi, tf.sqrt(tf.exp(sigma))
def kmeans(K, is_valid=False): # Loading data #data = np.load('data2D.npy') data = np.load('data100D.npy') [N, D] = np.shape(data) #plt.scatter(data.T[0], data.T[1]) # For Validation set if is_valid: valid_batch = int(N / 3.0) np.random.seed(45689) rnd_idx = np.arange(N) np.random.shuffle(rnd_idx) val_data = data[rnd_idx[:valid_batch]] data = data[rnd_idx[valid_batch:]] np.random.seed(521) num_ep = 1000 losses = [] assg = [] valid_losses = [] learning_rate = 0.003 s_stddev=0.05 X = tf.placeholder("float", [None, D], "X") mu = tf.Variable(tf.random_normal([K, D], stddev = s_stddev)) sigma = tf.Variable(tf.random_normal([K, 1], stddev = s_stddev)) sigma = tf.exp(sigma) log_PDF = log_GaussPDF(X, mu, sigma) initial_pi = tf.Variable(tf.random_normal([K, 1], stddev = s_stddev)) log_pi = tf.squeeze(hlp.logsoftmax(initial_pi)) # reduce the total loss loss = - tf.reduce_sum(hlp.reduce_logsumexp(log_PDF + log_pi, 1, keep_dims=True)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train = optimizer.minimize(loss) # determine the clusters pred = tf.argmax(tf.nn.softmax(log_posterior(log_PDF, log_pi)), 1) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) for i in range(num_ep): cenVal, cur_l, _, assg = sess.run([mu, loss, train, pred], feed_dict={X:data}) losses.append(cur_l) # if i%10 ==0: # print("iteration:", i, "loss", cur_l) if is_valid: _, valid_loss, _, _ = sess.run([mu, loss, train, pred], feed_dict={X: val_data}) valid_losses.append(valid_loss) print("K = {}, Final loss: {}".format(K, losses[-1])) clusters = Counter(assg) assg=np.int32(assg) for i in range(K): print("Cluster {}: {}%".format(i, clusters[i]*100.0/N)) plt.scatter(data[:, 0], data[:, 1], c=assg, cmap=plt.get_cmap('Set3'), s=25, alpha=0.6) plt.scatter(cenVal[:, 0], cenVal[:, 1], marker='*', c="black", cmap=plt.get_cmap('Set1'), s=80, linewidths=2) plt.title('K-Means Clustering') plt.xlabel('X1') plt.ylabel('X2') plt.grid() plt.show() # if is_valid: # print("K = {}, Validation loss: {}".format(K, valid_loss)) plt.figure(1) plt.plot(range(len(losses)),losses,c="c", label="train_loss") plt.plot(range(len(valid_losses)),valid_losses,c="r", label="valid_loss") plt.legend(loc = "best") plt.title('K-Means History') plt.xlabel('# of Inter') plt.ylabel('Loss') plt.show() return valid_losses
dtype=tf.float64, shape=(K, data.shape[1]), initializer=tf.initializers.random_normal(seed=0)) phi = tf.get_variable(name='stdev_vector', dtype=tf.float64, shape=(K, 1), initializer=tf.initializers.random_normal(seed=0)) psi = tf.get_variable(name='pi_vector', dtype=tf.float64, shape=(K, 1), initializer=tf.initializers.random_normal(seed=0)) sigma = tf.exp(phi) logGaussPDF = log_GaussPDF(x, mu, sigma) logPi = hlp.logsoftmax(psi) log_post = log_posterior(logGaussPDF, logPi) NLLloss = NLL_loss(logGaussPDF, logPi) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(NLLloss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_loss_list = []
stdvec = tf.get_variable( name='std', dtype=tf.float64, shape=(K, 1), initializer=tf.initializers.random_normal(seed=366901768)) prior = tf.get_variable( name='pi', dtype=tf.float64, shape=(K, 1), initializer=tf.initializers.random_normal(seed=1566557)) sigma = tf.exp(stdvec) log_gauss_pdf = log_GaussPDF(x, mu, sigma) log_prior = hlp.logsoftmax(prior) #To ensure that priors are normalized log_post = log_posterior(log_gauss_pdf, log_prior) #Defining loss function temp = hlp.reduce_logsumexp(tf.squeeze(log_prior) + log_gauss_pdf) loss = -tf.reduce_sum(temp) optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_loss = [] for epoch in range(epochs): sess.run(optimizer, feed_dict={x: data})
tf.reset_default_graph() D = data.shape[1] K = 15 #Number of clusters LEARNING_RATE = 0.01 MAX_ITERS = 1000 points = tf.placeholder(dtype=tf.float32, shape=[None, D], name='points') centroid_init = tf.truncated_normal(shape=[K, D], dtype=tf.float32) sigma_init = tf.truncated_normal(shape=[K, 1], dtype=tf.float32) centroids = tf.get_variable(dtype=tf.float32, initializer=centroid_init, name="centroids") distances = distanceFunc(points, centroids) # for kmeans only log_pi = hlp.logsoftmax(tf.Variable(tf.random_normal([K, 1], dtype=tf.float32))) phi = tf.Variable(tf.truncated_normal(shape=[K, 1], dtype=tf.float32)) good_sigma_square = tf.exp(phi) good_sigma = tf.sqrt(good_sigma_square) good_pi = hlp.logsoftmax(good_sigma_square) log_pdf = log_GaussPDF(data, centroids, good_sigma) log_pi = tf.transpose(good_pi) log_pst = log_posterior(log_pdf, log_pi) log_loss = hlp.reduce_logsumexp(log_pdf + log_pi, reduction_indices=1) # loss and optimizer for MoG loss = tf.reduce_sum(-1 * log_loss) # negative log loss
# For Validation set if is_valid: valid_batch = int(num_pts / 3.0) np.random.seed(45689) rnd_idx = np.arange(num_pts) np.random.shuffle(rnd_idx) val_data = data[rnd_idx[:valid_batch]] data = data[rnd_idx[valid_batch:]] K = 30 epochs = 3000 #pk = hlp.logsoftmax(tf.transpose(tf.range(1, K + 1, 1))) pk = tf.get_variable(name='pk', shape=[K, 1], initializer=tf.random_normal_initializer()) log_pi_ = hlp.logsoftmax(pk) X = tf.placeholder(dtype=tf.float32, shape=[None, dim], name="X") sigma_ = tf.exp( tf.get_variable(name='sigma', shape=[K, 1], initializer=tf.random_normal_initializer())) MU = tf.get_variable(name='MU', shape=[K, dim], initializer=tf.random_normal_initializer()) log_PDF_ = log_GaussPDF(X, MU, sigma_) loss = -tf.reduce_sum( hlp.reduce_logsumexp( tf.add(log_PDF_, tf.transpose(log_pi_)), 1, keep_dims=True)) optimizer = tf.train.AdamOptimizer(learning_rate=0.003, beta1=0.9, beta2=0.99,
MU = tf.get_variable(name='MU', shape=(K, D), dtype=tf.float32, initializer=tf.initializers.random_normal, trainable=True) log_sigma = tf.get_variable(name='log_sigma', shape=(K, 1), dtype=tf.float32, initializer=tf.initializers.zeros, trainable=True) sigma = tf.exp(log_sigma, name='sigma') phi = tf.ones((K, 1), dtype=tf.float32) log_pi = hlp.logsoftmax(phi) log_PDF = log_GaussPDF(X, MU, sigma) log_post = log_posterior(log_PDF, log_pi) logp = log_PDF + log_post logp = hlp.reduce_logsumexp(logp, reduction_indices=1, keep_dims=False) logp = tf.reduce_sum(logp, axis=0) assert logp.shape == () pred = tf.argmax(log_PDF, axis=1) loss = -logp
def MLE(learning_rate, K, data_to_use=data, num_pts_to_use=num_pts, D=dim, epochs=1000, validation=False, loss_data=False, percentages=False, showProgress=True): tf.set_random_seed(421) #Data x = tf.placeholder(tf.float32, shape=(None, D), name='x') #Variables sigma_unbounded = tf.Variable(tf.random_normal([K, 1]), name='sigma_unbounded') MU = tf.Variable(tf.random_normal([K, D]), name='MU') proba_pi_unbounded = tf.Variable(tf.random_normal([K, 1]), name='proba_pi') #transforming pi and sigma to bound them with their correct respective constraints log_pi = hlp.logsoftmax(proba_pi_unbounded) sigma = tf.exp(sigma_unbounded, name='sigma') #Calculating loss log_PDF = log_GaussPDF(x, MU, sigma) log_weighted_PDF = tf.transpose(log_pi) + log_PDF loss_op = -tf.reduce_sum(hlp.reduce_logsumexp(log_weighted_PDF)) #Determing best cluster for each point log_posterior_tensor = log_posterior(log_PDF, log_pi) sets = tf.argmax(log_posterior_tensor, 1) ##Optimize Loss optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-5) train_op = optimizer.minimize(loss_op) init = tf.global_variables_initializer() trainLossHistory = np.zeros(epochs) trainData = data_to_use # For Validation set if validation: valid_batch = int(num_pts_to_use / 3.0) np.random.seed(45689) rnd_idx = np.arange(num_pts_to_use) np.random.shuffle(rnd_idx) valdata = data_to_use[rnd_idx[:valid_batch]] trainData = data_to_use[rnd_idx[valid_batch:]] with tf.Session() as sess: sess.run(init) print("Starting MLE algorith with K=" + str(K)) for step in range(0, epochs): _, trainLoss, sets_after, mu_out, sigma_out, l_pi = sess.run( [train_op, loss_op, sets, MU, sigma, log_pi], feed_dict={x: trainData}) if (loss_data): trainLossHistory[step] = trainLoss if (step % 100 == 0 and showProgress): print("Step " + str(step)) print(trainLoss) plt.scatter(trainData[:, 0], trainData[:, 1], c=sets_after, s=10, alpha=0.1) plt.plot(mu_out[:, 0], mu_out[:, 1], 'r+', markersize=20, mew=3) plt.show() print("Optimization Finished!") sets_final, mu_final, sigma_final, log_pi_final = sess.run( [sets, MU, sigma, log_pi], feed_dict={x: data_to_use}) pi_final = np.exp(log_pi_final) validationLoss = 0 if validation: validationLoss = sess.run([loss_op], feed_dict={x: valdata}) dict_percentages = 0 if percentages: unique, counts = np.unique(sets_final, return_counts=True) dict_percentages = counts / np.sum(counts) return mu_final, trainLossHistory, validationLoss, dict_percentages, sets_final, sigma_final, pi_final
def GMM(K, is_valid=False): print('Enter K = {} -----------------------------------------'.format(K)) # For Validation set train_data = data train_batch = num_pts if is_valid: valid_batch = int(num_pts / 3.0) train_batch = num_pts - valid_batch np.random.seed(45689) rnd_idx = np.arange(num_pts) np.random.shuffle(rnd_idx) val_data = data[rnd_idx[:valid_batch]] train_data = data[rnd_idx[valid_batch:]] X = tf.placeholder(tf.float32, [None, dim]) MU = tf.Variable(tf.truncated_normal([K, dim], dtype=tf.float32)) phi = tf.Variable(tf.truncated_normal([K, 1], dtype=tf.float32)) psi = tf.Variable(tf.truncated_normal( [K, 1], dtype=tf.float32)) # tf.Variable(tf.ones([K, 1], dtype=tf.float32)/K) sigma_sq = tf.exp(phi) # Kx1 log_pi = hlp.logsoftmax(psi) # Kx1 log_gauss = log_GaussPDF(X, MU, sigma_sq) # NxK log_post = log_posterior(log_gauss, log_pi) #NxK assign_predict = tf.argmax(log_post, 1) loss = -tf.reduce_sum( hlp.reduce_logsumexp(log_gauss + tf.transpose(log_pi), 1), axis=0) percentages = tf.unique_with_counts(assign_predict) adam_op = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) init = tf.global_variables_initializer() sess = tf.InteractiveSession() sess.run(init) train_loss = [] valid_loss = [] for i in range(500): _, MU_value, log_pi_values, variance, loss_value, log_post_value, predictions, percentages_value = sess.run( [ adam_op, MU, log_pi, sigma_sq, loss, log_post, assign_predict, percentages ], feed_dict={X: train_data}) train_loss.append(loss_value) if is_valid: val_loss = sess.run(loss, feed_dict={X: val_data}) valid_loss.append(val_loss) # compute percentage print(percentages_value) sort_percentages = percentages_value[2][np.argsort(percentages_value[0])] print(np.divide(sort_percentages, train_batch)) # print(np.exp(log_pi_values[np.argsort(percentages_value[0])])) # print(variance[np.argsort(percentages_value[0])]) # print(MU_value) print('final loss on train data :') print(train_loss[len(train_loss) - 1]) print('final loss on valid data :') print(valid_loss[len(valid_loss) - 1])
def loss_function(log_PDF, log_pi): loss = -tf.reduce_sum( hlp.reduce_logsumexp(log_PDF + log_pi, 1, keep_dims=True), axis=0) return loss phi = tf.Variable( tf.random_normal(np.array([K, 1]), stddev=0.05, dtype=X.dtype)) # The phi value. sigma = tf.exp(phi) # The variance of the model. psi = tf.Variable( tf.random_normal(np.array([K, 1]), stddev=0.05, dtype=X.dtype)) # The psi value logpi = tf.squeeze(hlp.logsoftmax(psi)) # The log of the pi parameter. logpdf = log_GaussPDF(X, MU, sigma) prediction = tf.argmax(tf.nn.softmax(log_posterior(logpdf, logpi)), 1) loss = loss_function(logpdf, logpi) optimizer = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize( loss) # Adam optimizing function. with tf.Session() as training_loop: tf.initializers.global_variables().run() prev_loss = float('inf')
# log_post: N X K return log_pi + log_PDF - hlp.reduce_logsumexp(PDF * pi, 1, True) X = tf.constant(data, dtype=tf.float32) MU = tf.Variable(tf.random_normal(shape=(K, dim)), name='mean', dtype=tf.float32) PHI = tf.Variable(tf.random_normal(shape=(K, 1)), name='phi', dtype=tf.float32) PSI = tf.Variable(tf.random_normal(shape=(K, 1)), name='psi', dtype=tf.float32) dist = distanceFunc(X, MU) PHI = tf.reshape(PHI, [1, K]) sigma_squared = tf.exp(PHI) norm_curve = tf.pow(1 / tf.sqrt(2 * math.pi * sigma_squared), dim) * tf.exp( (-dist) / (2 * sigma_squared)) PI = tf.exp(hlp.logsoftmax(PSI)) PI = tf.reshape(PI, [1, K]) assignments = tf.argmax(tf.exp(log_posterior(tf.log(norm_curve), tf.log(PI))), 1) loss = -tf.reduce_sum(hlp.reduce_logsumexp(PI * norm_curve)) optimizer = tf.train.AdamOptimizer(learning_rate=0.022, beta1=0.9, beta2=0.99, epsilon=1e-5) train = optimizer.minimize(loss) init = tf.global_variables_initializer() with tf.Session() as sess:
# Loss function of the GMM: def loss_function(log_PDF, log_pi): loss = -tf.reduce_sum( hlp.reduce_logsumexp(log_PDF + log_pi, 1, keep_dims=True), axis=0) return loss sigma = tf.exp( tf.Variable(tf.random_normal(np.array([K, 1]), stddev=0.05, dtype=X.dtype))) # The variance of the model. logpi = tf.squeeze( hlp.logsoftmax( tf.Variable( tf.random_normal(np.array([K, 1]), stddev=0.05, dtype=X.dtype)))) # The log of the pi parameter. logpdf = log_GaussPDF(X, MU, sigma) prediction = tf.argmax(tf.nn.softmax(log_posterior(logpdf, logpi)), 1) loss = loss_function(logpdf, logpi) optimizer = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize( loss) # Adam optimizing function. with tf.Session() as training_loop: tf.initializers.global_variables().run()