Exemple #1
0
def test_em():
    init_mixture, post = common.init(X, K, seed)
    mixture, post, c = em.run(X, init_mixture, post)

    prediction = em.fill_matrix(X, mixture)
    print(c)
    print(common.rmse(prediction, X_gold))
def run_matrix_completion():
    K = 12
    seed = 1
    mixture, post = common.init(X, K, seed)
    mixture, post, ll = em.run(X, mixture, post)
    X_pred = em.fill_matrix(X, mixture)
    X_gold = np.loadtxt('netflix_complete.txt')
    print("RMSE:", common.rmse(X_gold, X_pred))
Exemple #3
0
def test_k12():
    lls = []
    for s in [0, 1, 2, 3, 4]:
        print(s)
        init_mixture, post = common.init(X, 12, s)
        model = em.run(X, init_mixture, post)
        lls.append(model)
    m, p, l = max(lls, key=lambda x: x[-1])
    prediction = em.fill_matrix(X, m)
    return common.rmse(prediction, X_gold)
Exemple #4
0
def run_matrix_completion():
    K = 12
    seed = 1
    mixture, post = common.init(X, K, seed)
    (mu, var, p), post, ll = em.run(X, mixture, post)
    # print('Mu:\n' + str(mu))
    # print('Var: ' + str(var))
    # print('P: ' + str(p))
    # print('post:\n' + str(post))
    # print('LL: ' + str(ll))
    X_pred = em.fill_matrix(X, common.GaussianMixture(mu, var, p))
    X_gold = np.loadtxt('netflix_complete.txt')
    print("MAE:", common.mae(X_gold, X_pred))
Exemple #5
0
from operator import pos
import numpy as np
import em
import common
import naive_em

X = np.loadtxt("test_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")

K = 4
n, d = X.shape
seed = 0

# TODO: Your code here
# mixtures , post = common.init(X , K , seed)
# new_mixture , new_post , new_ll = naive_em.run(X , mixtures , post)

# print(new_mixture)
# print(new_ll)

mixtures, post = common.init(X, K, seed)
X_complete = em.fill_matrix(X, mixtures)
print(X_gold)
print(X_complete)
Exemple #6
0
import numpy as np
import em
import common

X = np.loadtxt("test_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")

K = 4
n, d = X.shape
seed = 0

# TODO: Your code here

mix_conv, post_conv, log_lh_conv = em.run(X, *common.init(X, K, seed))

X_predict = em.fill_matrix(X, mix_conv)

rmse = common.rmse(X_gold, X_predict)

#%% Begin: Comparison of EM for matrix completion with K = 1 and 12
import time

X = np.loadtxt("netflix_incomplete.txt")
X_gold = np.loadtxt("netflix_complete.txt")

K = [1, 12]  # Clusters to try

log_lh = [0, 0, 0, 0, 0]  # Log likelihoods for different seeds

# Best seed for cluster based on highest log likelihoods
best_seed = [0, 0]
Exemple #7
0
#         gaussian, post, new_ll = kmeans.run(X, gaussian, post)
#         common.plot(X, gaussian, post, "K-means: number of classes{}, random seed {}".format(k, i))
#
# for k in range(1, 5, 1):
#     for i in range(1):
#         gaussian, post = common.init(X, k, seed=i)
#         gaussian, post, new_ll = naive_em.run(X, gaussian, post)
#         common.plot(X, gaussian, post, "EM: number of classes{}, random seed {}".format(k, i))

X = np.loadtxt("netflix_incomplete.txt")
X_gold = np.loadtxt('netflix_complete.txt')
# for k in [1, 12]:
#     for i in range(5):
#         gaussian, post = common.init(X, k, seed=i)
#         gaussian, post, new_ll = em.run(X, gaussian, post)
#         print("EM: number of classes {}, random seed {}:".format(k, i))
#         print(new_ll)

gaussian, post = common.init(X, 12, seed=1)
gaussian, post, new_ll = em.run(X, gaussian, post)
X_pred = em.fill_matrix(X, gaussian)
print(common.rmse(X_gold, X_pred))

# for k in range(1, 5, 1):
#     for i in range(5):
#         gaussian, post = common.init(X, k, seed=i)
#         gaussian, post, new_ll = naive_em.run(X, gaussian, post)
#         print("BIC = {} for K = {} and seed = {}".format(common.bic(X, gaussian, new_ll), k, i))
#
#
Exemple #8
0
# print('LL:' + str(ll))
# print()

# print("After first M-step:")
mu, var, p = em.mstep(X, post, mixture)
# print('Mu:\n' + str(mu))
# print('Var: ' + str(var))
# print('P: ' + str(p))
# print()

# print("After a run")
(mu, var, p), post, ll = em.run(X, mixture, post)
# print('Mu:\n' + str(mu))
# print('Var: ' + str(var))
# print('P: ' + str(p))
# print('post:\n' + str(post))
# print('LL: ' + str(ll))
X_pred = em.fill_matrix(X, common.GaussianMixture(mu, var, p))
# error = common.rmse(X_gold, X_pred)
# print("X_gold:\n" + str(X_gold))
# X_pred = np.round(X_pred)
fil = open(
    '/home/animesh/WTA/movie_recommendation/recommender/trainer/test_file.txt',
    'w')
fil.write(str(n) + ' ' + str(d) + '\n')
for i in X_pred:
    for j in i:
        fil.write(str(round(j, 5)) + " ")
    fil.write("\n")
# print("RMSE: " + str(error))
Exemple #9
0
import numpy as np
import em
import common

X = np.loadtxt("netflix_incomplete.txt")
X_gold = np.loadtxt("netflix_complete.txt")

K = 12

log_lh = [0, 0, 0, 0, 0]
best_seed = 0
mixtures = [0, 0, 0, 0, 0]
posts = [0, 0, 0, 0, 0]
rmse = 0.

# Test all seeds
for i in range(5):
    mixtures[i], posts[i], log_lh[i] = em.run(X, *common.init(X, K, i))

best_seed = np.argmax(log_lh)
Y = em.fill_matrix(X, mixtures[best_seed])
rmse = common.rmse(X_gold, Y)
print("RMSE for K = 12: {:.4f}".format(rmse))
Exemple #10
0
        em_mix, em_post, em_ll = naive_em.run(X, init_mix, init_post)
        if k_cost < k_best_cost:
            k_best_mix, k_best_post, k_best_cost = k_mix, k_post, k_cost
        if em_ll > em_best_ll:
            em_best_mix, em_best_post, em_best_ll = em_mix, em_post, em_ll
    BICs[i] = common.bic(X, em_best_mix, em_best_ll)
    common.plot(X, k_best_mix, k_best_post, "K-means K={}".format(K))
    common.plot(X, em_best_mix, em_best_post, "EM K={}".format(K))

print("BICs: ", BICs)
print("Best BIC: ", np.max(BICs))
print("Best K: ", Ks[np.argmax(BICs)])

X = np.loadtxt("netflix_incomplete.txt")

K = 12
seeds = [0, 1, 2, 3, 4]

em_best_mix, em_best_post, em_best_ll = None, None, -np.inf
for seed in seeds:
    init_mix, init_post = common.init(X, K, seed)
    em_mix, em_post, em_ll = em.run(X, init_mix, init_post)
    if em_ll > em_best_ll:
        em_best_mix, em_best_post, em_best_ll = em_mix, em_post, em_ll
print("K = {}, LL = {}".format(K, em_best_ll))

X_fill_pred = em.fill_matrix(X, em_best_mix)
X_fill = np.load("netflix_complete")

print("X_filled Error:", common.rmse(X_fill_pred, X_fill))
# Posterior probs. for best seeds
posts = [0, 0, 0, 0, 0]

# RMS Error for clusters
rmse = [0., 0.]

start_time = time.perf_counter()

for k in range(len(K)):
    for i in range(5):

        # Run EM
        mixtures[i], posts[i], log_lh[i] = \
        em.run(X, *common.init(X, K[k], i))

    # Print lowest cost
    print("=============== Clusters:", K[k], "======================")
    print("Highest log likelihood using EM is:", np.max(log_lh))

    #    # Save best seed for plotting
    best_seed[k] = np.argmax(log_lh)
    #
    #    # Use the best mixture to fill prediction matrix
    X_pred = em.fill_matrix(X, mixtures[best_seed[k]])
    rmse[k] = common.rmse(X_gold, X_pred)

print("===================================================")
print("RMS Error for K = 12 is: {:.4f}".format(rmse[1]))
end_time = time.perf_counter()
print("Time taken for this run: {:.4f} seconds".format(end_time - start_time))
seed = 0

[mixture, post] = common.init(X, K, seed)

[post, L] = em.estep(X, mixture)
mixture = em.mstep(X, post, mixture)
print(post)
print(L)
print(mixture)

[mixture, post, L] = em.run(X, mixture, post)
print(post)
print(L)
print(mixture)

X_prep = em.fill_matrix(X, mixture)
print(X_prep)

RMSE = common.rmse(X_gold, X_prep)
print(RMSE)

K = 4

for seed in range(5):

    [mixture, post] = common.init(X, K, seed)
    [mixture, post, L] = em.run(X, mixture, post)
    print(L)

    X_prep = em.fill_matrix(X, mixture)
    RMSE = common.rmse(X_gold, X_prep)
Exemple #13
0
# mixture, post = common.init(X, K, i)
# title= "EM Model with K=" + str(K) + " and " + "Seed=" +str(i)
# mixture, post, ll = naive_em.run(X, mixture, post)
# print("EM output: ")
# common.plot(X, mixture, post, title)
# print(i, ll)
# mixture, post = common.init(X, K, i)
# mixture, post, cost = kmeans.run(X, mixture, post)
# print("K_means output: ")
# print(i, cost)
# title = "K-means Model with K=" + str(K) + " and " + "Seed=" + str(i)
# common.plot(X, mixture, post, title)

# mixture, post = common.init(X, 3, 0)
# print(naive_em.estep(X, mixture))
# for i in range(0, 5):
#     mixture, post = common.init(X, 12, i)
#     mixture, post, ll = em.run(X, mixture, post)
#     print(i, ll)
mixture, post = common.init(X, 12, 1)
mixture, post, ll = em.run(X, mixture, post)
X_pred = em.fill_matrix(X_gold, mixture)
n, d = X.shape
error = 0
count = 0
for i in range(n):
    for j in range(d):
        if X[i][j] == 0:
            error = error + (X_gold[i, j] - X_pred[i, j])**2
            count = count + 1
print(np.sqrt(error / count))
Exemple #14
0
import em
import embak3


X = np.loadtxt("toy_data.txt")
# X = np.loadtxt("netflix_incomplete.txt")
#X = np.loadtxt("incomplete_mine.txt")

# TODO: Your code here
# for K in 1,12:
#     for seed in range(0,5):
#         print("K = {}, seed = {}".format(K, seed))
# K = 1
# seed = 0
        # mixture, post = common.init(X, K, seed)
        # mixture, post, cost = kmeans.run(X, mixture, post)
        # title = "K = {}, seed = {},  cost = {} plot.png".format(K, seed, int(cost))
        # common.plot(X, mixture, post, title)

K = 3
seed = 0
mixture, post = common.init(X, K, seed)
# mixture, post, l = naive_em.run(X, mixture, post)
mixture, post, l = em.run(X, mixture, post)
# mixture, post, l = embak3.run(X, mixture, post)
# bic = common.bic(X, mixture, l)
# print("bic = ", bic)
# title = "K = {}, seed = {},  log likelyhood = {} plot.png".format(K, seed, int(l))
# common.plot(X, mixture, post, title)   
print(em.fill_matrix(X, mixture))
             
Exemple #15
0
# print(n_clusters[np.argmax(bics)])

##=============================================
# running em

n_clusters = np.array([12])
seeds = np.array([0, 1, 2, 3, 4])

for n_cluster in n_clusters:
    log_lhs = np.empty(seeds.shape[0])
    mixtures = []
    posts = []

    for i, seed in enumerate(seeds):

        mixture, post = common.init(X, n_cluster, seed)
        mixture, post, log_lh = em.run(X, mixture, post)
        log_lhs[i] = log_lh
        mixtures.append(mixture)
        posts.append(post)

    idx_max_seed = np.argmax(log_lhs)  # max becaus it is loglikelihoood
    #common.plot(X, mixtures[idx_min_seed], posts[idx_min_seed], str(n_cluster))
    print(log_lhs[idx_max_seed])

best_mixture = mixtures[idx_max_seed]

X_pred = em.fill_matrix(X, best_mixture)

print(common.rmse(X_gold, X_pred))
Exemple #16
0
def run_matrix_completion():
    mixture, post = common.init(X, 12, 1)
    mixture, post, ll = em.run(X, mixture, post)
    X_pred = em.fill_matrix(X, mixture)
    X_gold = np.loadtxt('netflix_complete.txt')
    print("root mean squared error:", common.rmse(X_gold, X_pred))
    print("K =", K+1, " cost =", cost_min)

print()
print("E-M")
"""
best_K = None
best_bic = float('-inf')
for K in [0, 11]:
    ll_max = float('-inf')
    best_seed = None
    best_mixture = None
    for seed in range(5):
        mixture, post = common.init(X, K + 1, seed)
        mixture, post, ll = em.run(X, mixture, post)
        full_matrix = em.fill_matrix(X, mixture)
        #common.plot(X, mixture, post, "E-M, K="+str(K)+" seed="+str(seed))
        if ll > ll_max:
            best_seed = seed
            ll_max = ll
            best_mixture = mixture
    """
    bic = common.bic(X, best_mixture, ll_max)
    if bic > best_bic:
        best_K = K+1
        best_bic = bic
    """
    print("K =", K + 1, " LL =", ll)
#print("full_matrix =")
#print(full_matrix[4,:])
#print("Best K=", best_K, " Best BIC=", best_bic)
Exemple #18
0
import em
import common

X = np.loadtxt("test_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")
X_gold_netflix = np.loadtxt("netflix_complete.txt")
X_netflix =np.loadtxt("netflix_incomplete.txt")

K = 12
n, d = X.shape
seed = [0,1,2,3,4]


# TODO: Your code here
for i in range(len(seed)):
    print(seed[i])
    init_model = common.init(X_netflix, K, seed[i])
    mixture, post, cost = em.run(X_netflix, init_model[0], init_model[1])
    X_pred = em.fill_matrix(X_netflix, mixture)
    rmse = common.rmse(X_gold_netflix,X_pred)
    print(cost)
    print(rmse)

# K= 4
# n,d = X.shape
# seed =0
# init_model = common.init(X, K, seed)
# mixture, post, cost = em.run(X, init_model[0], init_model[1])
# # print(mixture)
# X_pred = em.fill_matrix(X,mixture)
# print(X_pred)
Exemple #19
0
        print('K=', k, 'seed=', seed, 'logloss=', LL)

    best_seed = np.argmax(logloss)
    logloss = logloss[best_seed]
    mixture = mixtures[best_seed]
    post = posts[best_seed]

    current_bic = common.bic(X, mixture, logloss)
    bic[j] = current_bic

    print(f'K={k}', f'Best seed={best_seed}', f'logloss={logloss}', f'BIC={current_bic}')

best_K_ix = np.argmax(bic)
best_K = K[best_K_ix]
best_bic = bic[best_K_ix]
print(f"Best K={best_K}", f"BIC={best_bic}")

# -----------------------------------
# EM Algorithm for Matrix Completion
# -----------------------------------

X_gold = np.loadtxt('netflix_complete.txt')

X_pred = em.fill_matrix(X, mixture)

rmse = common.rmse(X_gold, X_pred)

print(f"RMSE= {rmse}")

print(X)
print(X_pred)
import numpy as np
import em
import common

# %% Testing implementation of EM algorithm
X = np.loadtxt("test_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")

n, d = X.shape

K = 4
seed = 0

mix_conv, post_conv, log_lh_conv = em.run(X, *common.init(X, K, seed))

X_predict = em.fill_matrix(X, mix_conv)

rmse = common.rmse(X_gold, X_predict)

# %% Begin: Comparison of EM for matrix completion with K = 1 and 12
import time

X = np.loadtxt("netflix_incomplete.txt")
# X_gold = np.loadtxt("netflix_complete.txt")

K = [1, 12]  # Clusters to try

log_lh = [0, 0, 0, 0, 0]  # Log likelihoods for different seeds

# Best seed for cluster based on highest log likelihoods
best_seed = [0, 0]