def test_em(): init_mixture, post = common.init(X, K, seed) mixture, post, c = em.run(X, init_mixture, post) prediction = em.fill_matrix(X, mixture) print(c) print(common.rmse(prediction, X_gold))
def run_matrix_completion(): K = 12 seed = 1 mixture, post = common.init(X, K, seed) mixture, post, ll = em.run(X, mixture, post) X_pred = em.fill_matrix(X, mixture) X_gold = np.loadtxt('netflix_complete.txt') print("RMSE:", common.rmse(X_gold, X_pred))
def test_k12(): lls = [] for s in [0, 1, 2, 3, 4]: print(s) init_mixture, post = common.init(X, 12, s) model = em.run(X, init_mixture, post) lls.append(model) m, p, l = max(lls, key=lambda x: x[-1]) prediction = em.fill_matrix(X, m) return common.rmse(prediction, X_gold)
def run_matrix_completion(): K = 12 seed = 1 mixture, post = common.init(X, K, seed) (mu, var, p), post, ll = em.run(X, mixture, post) # print('Mu:\n' + str(mu)) # print('Var: ' + str(var)) # print('P: ' + str(p)) # print('post:\n' + str(post)) # print('LL: ' + str(ll)) X_pred = em.fill_matrix(X, common.GaussianMixture(mu, var, p)) X_gold = np.loadtxt('netflix_complete.txt') print("MAE:", common.mae(X_gold, X_pred))
from operator import pos import numpy as np import em import common import naive_em X = np.loadtxt("test_incomplete.txt") X_gold = np.loadtxt("test_complete.txt") K = 4 n, d = X.shape seed = 0 # TODO: Your code here # mixtures , post = common.init(X , K , seed) # new_mixture , new_post , new_ll = naive_em.run(X , mixtures , post) # print(new_mixture) # print(new_ll) mixtures, post = common.init(X, K, seed) X_complete = em.fill_matrix(X, mixtures) print(X_gold) print(X_complete)
import numpy as np import em import common X = np.loadtxt("test_incomplete.txt") X_gold = np.loadtxt("test_complete.txt") K = 4 n, d = X.shape seed = 0 # TODO: Your code here mix_conv, post_conv, log_lh_conv = em.run(X, *common.init(X, K, seed)) X_predict = em.fill_matrix(X, mix_conv) rmse = common.rmse(X_gold, X_predict) #%% Begin: Comparison of EM for matrix completion with K = 1 and 12 import time X = np.loadtxt("netflix_incomplete.txt") X_gold = np.loadtxt("netflix_complete.txt") K = [1, 12] # Clusters to try log_lh = [0, 0, 0, 0, 0] # Log likelihoods for different seeds # Best seed for cluster based on highest log likelihoods best_seed = [0, 0]
# gaussian, post, new_ll = kmeans.run(X, gaussian, post) # common.plot(X, gaussian, post, "K-means: number of classes{}, random seed {}".format(k, i)) # # for k in range(1, 5, 1): # for i in range(1): # gaussian, post = common.init(X, k, seed=i) # gaussian, post, new_ll = naive_em.run(X, gaussian, post) # common.plot(X, gaussian, post, "EM: number of classes{}, random seed {}".format(k, i)) X = np.loadtxt("netflix_incomplete.txt") X_gold = np.loadtxt('netflix_complete.txt') # for k in [1, 12]: # for i in range(5): # gaussian, post = common.init(X, k, seed=i) # gaussian, post, new_ll = em.run(X, gaussian, post) # print("EM: number of classes {}, random seed {}:".format(k, i)) # print(new_ll) gaussian, post = common.init(X, 12, seed=1) gaussian, post, new_ll = em.run(X, gaussian, post) X_pred = em.fill_matrix(X, gaussian) print(common.rmse(X_gold, X_pred)) # for k in range(1, 5, 1): # for i in range(5): # gaussian, post = common.init(X, k, seed=i) # gaussian, post, new_ll = naive_em.run(X, gaussian, post) # print("BIC = {} for K = {} and seed = {}".format(common.bic(X, gaussian, new_ll), k, i)) # #
# print('LL:' + str(ll)) # print() # print("After first M-step:") mu, var, p = em.mstep(X, post, mixture) # print('Mu:\n' + str(mu)) # print('Var: ' + str(var)) # print('P: ' + str(p)) # print() # print("After a run") (mu, var, p), post, ll = em.run(X, mixture, post) # print('Mu:\n' + str(mu)) # print('Var: ' + str(var)) # print('P: ' + str(p)) # print('post:\n' + str(post)) # print('LL: ' + str(ll)) X_pred = em.fill_matrix(X, common.GaussianMixture(mu, var, p)) # error = common.rmse(X_gold, X_pred) # print("X_gold:\n" + str(X_gold)) # X_pred = np.round(X_pred) fil = open( '/home/animesh/WTA/movie_recommendation/recommender/trainer/test_file.txt', 'w') fil.write(str(n) + ' ' + str(d) + '\n') for i in X_pred: for j in i: fil.write(str(round(j, 5)) + " ") fil.write("\n") # print("RMSE: " + str(error))
import numpy as np import em import common X = np.loadtxt("netflix_incomplete.txt") X_gold = np.loadtxt("netflix_complete.txt") K = 12 log_lh = [0, 0, 0, 0, 0] best_seed = 0 mixtures = [0, 0, 0, 0, 0] posts = [0, 0, 0, 0, 0] rmse = 0. # Test all seeds for i in range(5): mixtures[i], posts[i], log_lh[i] = em.run(X, *common.init(X, K, i)) best_seed = np.argmax(log_lh) Y = em.fill_matrix(X, mixtures[best_seed]) rmse = common.rmse(X_gold, Y) print("RMSE for K = 12: {:.4f}".format(rmse))
em_mix, em_post, em_ll = naive_em.run(X, init_mix, init_post) if k_cost < k_best_cost: k_best_mix, k_best_post, k_best_cost = k_mix, k_post, k_cost if em_ll > em_best_ll: em_best_mix, em_best_post, em_best_ll = em_mix, em_post, em_ll BICs[i] = common.bic(X, em_best_mix, em_best_ll) common.plot(X, k_best_mix, k_best_post, "K-means K={}".format(K)) common.plot(X, em_best_mix, em_best_post, "EM K={}".format(K)) print("BICs: ", BICs) print("Best BIC: ", np.max(BICs)) print("Best K: ", Ks[np.argmax(BICs)]) X = np.loadtxt("netflix_incomplete.txt") K = 12 seeds = [0, 1, 2, 3, 4] em_best_mix, em_best_post, em_best_ll = None, None, -np.inf for seed in seeds: init_mix, init_post = common.init(X, K, seed) em_mix, em_post, em_ll = em.run(X, init_mix, init_post) if em_ll > em_best_ll: em_best_mix, em_best_post, em_best_ll = em_mix, em_post, em_ll print("K = {}, LL = {}".format(K, em_best_ll)) X_fill_pred = em.fill_matrix(X, em_best_mix) X_fill = np.load("netflix_complete") print("X_filled Error:", common.rmse(X_fill_pred, X_fill))
# Posterior probs. for best seeds posts = [0, 0, 0, 0, 0] # RMS Error for clusters rmse = [0., 0.] start_time = time.perf_counter() for k in range(len(K)): for i in range(5): # Run EM mixtures[i], posts[i], log_lh[i] = \ em.run(X, *common.init(X, K[k], i)) # Print lowest cost print("=============== Clusters:", K[k], "======================") print("Highest log likelihood using EM is:", np.max(log_lh)) # # Save best seed for plotting best_seed[k] = np.argmax(log_lh) # # # Use the best mixture to fill prediction matrix X_pred = em.fill_matrix(X, mixtures[best_seed[k]]) rmse[k] = common.rmse(X_gold, X_pred) print("===================================================") print("RMS Error for K = 12 is: {:.4f}".format(rmse[1])) end_time = time.perf_counter() print("Time taken for this run: {:.4f} seconds".format(end_time - start_time))
seed = 0 [mixture, post] = common.init(X, K, seed) [post, L] = em.estep(X, mixture) mixture = em.mstep(X, post, mixture) print(post) print(L) print(mixture) [mixture, post, L] = em.run(X, mixture, post) print(post) print(L) print(mixture) X_prep = em.fill_matrix(X, mixture) print(X_prep) RMSE = common.rmse(X_gold, X_prep) print(RMSE) K = 4 for seed in range(5): [mixture, post] = common.init(X, K, seed) [mixture, post, L] = em.run(X, mixture, post) print(L) X_prep = em.fill_matrix(X, mixture) RMSE = common.rmse(X_gold, X_prep)
# mixture, post = common.init(X, K, i) # title= "EM Model with K=" + str(K) + " and " + "Seed=" +str(i) # mixture, post, ll = naive_em.run(X, mixture, post) # print("EM output: ") # common.plot(X, mixture, post, title) # print(i, ll) # mixture, post = common.init(X, K, i) # mixture, post, cost = kmeans.run(X, mixture, post) # print("K_means output: ") # print(i, cost) # title = "K-means Model with K=" + str(K) + " and " + "Seed=" + str(i) # common.plot(X, mixture, post, title) # mixture, post = common.init(X, 3, 0) # print(naive_em.estep(X, mixture)) # for i in range(0, 5): # mixture, post = common.init(X, 12, i) # mixture, post, ll = em.run(X, mixture, post) # print(i, ll) mixture, post = common.init(X, 12, 1) mixture, post, ll = em.run(X, mixture, post) X_pred = em.fill_matrix(X_gold, mixture) n, d = X.shape error = 0 count = 0 for i in range(n): for j in range(d): if X[i][j] == 0: error = error + (X_gold[i, j] - X_pred[i, j])**2 count = count + 1 print(np.sqrt(error / count))
import em import embak3 X = np.loadtxt("toy_data.txt") # X = np.loadtxt("netflix_incomplete.txt") #X = np.loadtxt("incomplete_mine.txt") # TODO: Your code here # for K in 1,12: # for seed in range(0,5): # print("K = {}, seed = {}".format(K, seed)) # K = 1 # seed = 0 # mixture, post = common.init(X, K, seed) # mixture, post, cost = kmeans.run(X, mixture, post) # title = "K = {}, seed = {}, cost = {} plot.png".format(K, seed, int(cost)) # common.plot(X, mixture, post, title) K = 3 seed = 0 mixture, post = common.init(X, K, seed) # mixture, post, l = naive_em.run(X, mixture, post) mixture, post, l = em.run(X, mixture, post) # mixture, post, l = embak3.run(X, mixture, post) # bic = common.bic(X, mixture, l) # print("bic = ", bic) # title = "K = {}, seed = {}, log likelyhood = {} plot.png".format(K, seed, int(l)) # common.plot(X, mixture, post, title) print(em.fill_matrix(X, mixture))
# print(n_clusters[np.argmax(bics)]) ##============================================= # running em n_clusters = np.array([12]) seeds = np.array([0, 1, 2, 3, 4]) for n_cluster in n_clusters: log_lhs = np.empty(seeds.shape[0]) mixtures = [] posts = [] for i, seed in enumerate(seeds): mixture, post = common.init(X, n_cluster, seed) mixture, post, log_lh = em.run(X, mixture, post) log_lhs[i] = log_lh mixtures.append(mixture) posts.append(post) idx_max_seed = np.argmax(log_lhs) # max becaus it is loglikelihoood #common.plot(X, mixtures[idx_min_seed], posts[idx_min_seed], str(n_cluster)) print(log_lhs[idx_max_seed]) best_mixture = mixtures[idx_max_seed] X_pred = em.fill_matrix(X, best_mixture) print(common.rmse(X_gold, X_pred))
def run_matrix_completion(): mixture, post = common.init(X, 12, 1) mixture, post, ll = em.run(X, mixture, post) X_pred = em.fill_matrix(X, mixture) X_gold = np.loadtxt('netflix_complete.txt') print("root mean squared error:", common.rmse(X_gold, X_pred))
print("K =", K+1, " cost =", cost_min) print() print("E-M") """ best_K = None best_bic = float('-inf') for K in [0, 11]: ll_max = float('-inf') best_seed = None best_mixture = None for seed in range(5): mixture, post = common.init(X, K + 1, seed) mixture, post, ll = em.run(X, mixture, post) full_matrix = em.fill_matrix(X, mixture) #common.plot(X, mixture, post, "E-M, K="+str(K)+" seed="+str(seed)) if ll > ll_max: best_seed = seed ll_max = ll best_mixture = mixture """ bic = common.bic(X, best_mixture, ll_max) if bic > best_bic: best_K = K+1 best_bic = bic """ print("K =", K + 1, " LL =", ll) #print("full_matrix =") #print(full_matrix[4,:]) #print("Best K=", best_K, " Best BIC=", best_bic)
import em import common X = np.loadtxt("test_incomplete.txt") X_gold = np.loadtxt("test_complete.txt") X_gold_netflix = np.loadtxt("netflix_complete.txt") X_netflix =np.loadtxt("netflix_incomplete.txt") K = 12 n, d = X.shape seed = [0,1,2,3,4] # TODO: Your code here for i in range(len(seed)): print(seed[i]) init_model = common.init(X_netflix, K, seed[i]) mixture, post, cost = em.run(X_netflix, init_model[0], init_model[1]) X_pred = em.fill_matrix(X_netflix, mixture) rmse = common.rmse(X_gold_netflix,X_pred) print(cost) print(rmse) # K= 4 # n,d = X.shape # seed =0 # init_model = common.init(X, K, seed) # mixture, post, cost = em.run(X, init_model[0], init_model[1]) # # print(mixture) # X_pred = em.fill_matrix(X,mixture) # print(X_pred)
print('K=', k, 'seed=', seed, 'logloss=', LL) best_seed = np.argmax(logloss) logloss = logloss[best_seed] mixture = mixtures[best_seed] post = posts[best_seed] current_bic = common.bic(X, mixture, logloss) bic[j] = current_bic print(f'K={k}', f'Best seed={best_seed}', f'logloss={logloss}', f'BIC={current_bic}') best_K_ix = np.argmax(bic) best_K = K[best_K_ix] best_bic = bic[best_K_ix] print(f"Best K={best_K}", f"BIC={best_bic}") # ----------------------------------- # EM Algorithm for Matrix Completion # ----------------------------------- X_gold = np.loadtxt('netflix_complete.txt') X_pred = em.fill_matrix(X, mixture) rmse = common.rmse(X_gold, X_pred) print(f"RMSE= {rmse}") print(X) print(X_pred)
import numpy as np import em import common # %% Testing implementation of EM algorithm X = np.loadtxt("test_incomplete.txt") X_gold = np.loadtxt("test_complete.txt") n, d = X.shape K = 4 seed = 0 mix_conv, post_conv, log_lh_conv = em.run(X, *common.init(X, K, seed)) X_predict = em.fill_matrix(X, mix_conv) rmse = common.rmse(X_gold, X_predict) # %% Begin: Comparison of EM for matrix completion with K = 1 and 12 import time X = np.loadtxt("netflix_incomplete.txt") # X_gold = np.loadtxt("netflix_complete.txt") K = [1, 12] # Clusters to try log_lh = [0, 0, 0, 0, 0] # Log likelihoods for different seeds # Best seed for cluster based on highest log likelihoods best_seed = [0, 0]