Esempio n. 1
0
def test_em():
    init_mixture, post = common.init(X, K, seed)
    mixture, post, c = em.run(X, init_mixture, post)

    prediction = em.fill_matrix(X, mixture)
    print(c)
    print(common.rmse(prediction, X_gold))
Esempio n. 2
0
def run_matrix_completion():
    K = 12
    seed = 1
    mixture, post = common.init(X, K, seed)
    mixture, post, ll = em.run(X, mixture, post)
    X_pred = em.fill_matrix(X, mixture)
    X_gold = np.loadtxt('netflix_complete.txt')
    print("RMSE:", common.rmse(X_gold, X_pred))
    def test_run_test_solution(self):
        X, mixture, post = ts.X, ts.mixture_first_run, ts.post_first_run
        expected_cost = ts.ll_first_run

        new_mixture, cost = em.run(X, mixture, post)

        self.assertEqual(np.isclose(cost, expected_cost),
        True, f'Cost: got {cost}, expected {expected_cost}')
def best_run_em(X):
    K = 12
    dict = {}
    likelihood_ls = []
    for seed in range(5):
        mixture, post = common.init(X, K, seed)
        mixture, post, LL = em.run(X, mixture, post)
        dict[LL] = mixture
    return dict[min(dict.keys())]
def best_run_em(X):
    K = 12
    dict = {}
    for seed in range(5):
        np.random.seed(seed)
        mixture, post = common.init(X, K, seed)
        mixture, post, LL = em.run(X, mixture, post)
        dict[LL] = (mixture, seed)
    return dict[min(dict.keys())]
Esempio n. 6
0
def test_incomplete_em():
    for k_s in [1, 12]:
        lps = []
        for s in [0, 1, 2, 3, 4]:
            print(k_s, s)
            init_mixture, post = common.init(X, k_s, s)
            model = em.run(X, init_mixture, post)
            lps.append(model)
        best = max(lps, key=lambda x: x[-1])
        print(best[-1])
Esempio n. 7
0
def test_k12():
    lls = []
    for s in [0, 1, 2, 3, 4]:
        print(s)
        init_mixture, post = common.init(X, 12, s)
        model = em.run(X, init_mixture, post)
        lls.append(model)
    m, p, l = max(lls, key=lambda x: x[-1])
    prediction = em.fill_matrix(X, m)
    return common.rmse(prediction, X_gold)
def run_em(X):
    for K in [1, 12]:
        likelihood_ls = []
        for seed in range(5):
            mixture, post = common.init(X, K, seed)
            mixture, post, LL = em.run(X, mixture, post)
            likelihood_ls.append(LL)


        print("The likelihood of {} cluster is".format(K), max(likelihood_ls))
    return "Done"
Esempio n. 9
0
def test_em_seeds(X, K):
    print("\n############## EM K=" + str(K) + " ###############")

    mixture0, post0 = common.init(X, K, 0)
    mixture1, post1 = common.init(X, K, 1)
    mixture2, post2 = common.init(X, K, 2)
    mixture3, post3 = common.init(X, K, 3)
    mixture4, post4 = common.init(X, K, 4)

    cost0 = em.run(X, mixture0, post0)[2]
    cost1 = em.run(X, mixture1, post1)[2]
    cost2 = em.run(X, mixture2, post2)[2]
    cost3 = em.run(X, mixture3, post3)[2]
    cost4 = em.run(X, mixture4, post4)[2]

    print("K=" + str(K) + " seed=0 : likelihood=" + str(cost0))
    print("K=" + str(K) + " seed=1 : likelihood=" + str(cost1))
    print("K=" + str(K) + " seed=2 : likelihood=" + str(cost2))
    print("K=" + str(K) + " seed=3 : likelihood=" + str(cost3))
    print("K=" + str(K) + " seed=4 : likelihood=" + str(cost4))
Esempio n. 10
0
def run_em_netflix():
    for K in [1, 12]:
        max_ll = None
        best_seed = None
        for seed in range(0, 5):
            mixture, post = common.init(X, K, seed)
            mixture, post, ll = em.run(X, mixture, post)
            if max_ll is None or ll > max_ll:
                max_ll = ll
                best_seed = seed

        title = "EM for K={}, seed={}, ll={}".format(K, best_seed, max_ll)
        print(title)
Esempio n. 11
0
def run_matrix_completion():
    K = 12
    seed = 1
    mixture, post = common.init(X, K, seed)
    (mu, var, p), post, ll = em.run(X, mixture, post)
    # print('Mu:\n' + str(mu))
    # print('Var: ' + str(var))
    # print('P: ' + str(p))
    # print('post:\n' + str(post))
    # print('LL: ' + str(ll))
    X_pred = em.fill_matrix(X, common.GaussianMixture(mu, var, p))
    X_gold = np.loadtxt('netflix_complete.txt')
    print("MAE:", common.mae(X_gold, X_pred))
Esempio n. 12
0
def run_em(X, plot=False):
    max_bic = None
    for i in range(len(K)):
        max_ln_like = None
        best_seed = None
        for j in range(len(seed)):
            mixture, post = common.init(X, K[i], seed[j])
            mixture, post, ln_like = em.run(X, mixture, post)
            if max_ln_like is None or ln_like > max_ln_like:
                max_ln_like = ln_like
                best_seed = seed[j]
            if plot:
                common.plot(X, mixture, post,
                            "K={}, seed={}".format(K[i], seed[j]))

        mixture, post = common.init(X, K[i], best_seed)
        mixture, post, ln_like = em.run(X, mixture, post)
        bic = common.bic(X, mixture, ln_like)
        if max_bic is None or bic > max_bic:
            max_bic = bic
        print("K = {}, Max ln(likelihood) = {}, Best seed = {}, Max BIC = {}".
              format(K[i], max_ln_like, best_seed, max_bic))
Esempio n. 13
0
    def train(self):
        """ Train the model based on the provided data """
        if self.verbose:
            print "# TRAINING model", self.label

        if self.means is None or self.cov is None:
            err = "Gaussian Mixture Model should be init before trained"
            raise Exception(err)

        params = EM.run(self.trainingData, self.means, self.cov, self.weights,
                        self.K)
        self.means = params[0]
        self.cov = params[1]
        self.weights = params[2]
Esempio n. 14
0
    def train(self):
        """ Train the model based on the provided data """
        if self.verbose:
            print "# TRAINING model", self.label

        if self.means is None or self.cov is None:
            err = "Gaussian Mixture Model should be init before trained"
            raise Exception(err)

        params = EM.run(self.trainingData, self.means, self.cov,
                        self.weights, self.K)
        self.means = params[0]
        self.cov = params[1]
        self.weights = params[2]
Esempio n. 15
0
def run_EM_Netflix():
    """Runs the EM algorithm on the incomplete data matrix from Netflix ratings
    """
    for K in [1, 12]:
        max_ll = None
        best_seed = None
        for seed in range(5):
            mixture, post = common.init(X, K, seed)
            mixture, post, ll = em.run(X, mixture, post)
            if max_ll is None or ll > max_ll:
                max_ll = ll
                best_seed = seed

        title = "EM for K = {}, seed = {}, ll = {}".format(
            K, best_seed, max_ll)
        print(title)
Esempio n. 16
0
import numpy as np
import em
import common

X = np.loadtxt("netflix_incomplete.txt")
X_gold = np.loadtxt("netflix_complete.txt")

K = 12

log_lh = [0, 0, 0, 0, 0]
best_seed = 0
mixtures = [0, 0, 0, 0, 0]
posts = [0, 0, 0, 0, 0]
rmse = 0.

# Test all seeds
for i in range(5):
    mixtures[i], posts[i], log_lh[i] = em.run(X, *common.init(X, K, i))

best_seed = np.argmax(log_lh)
Y = em.fill_matrix(X, mixtures[best_seed])
rmse = common.rmse(X_gold, Y)
print("RMSE for K = 12: {:.4f}".format(rmse))
Esempio n. 17
0
        em_mix, em_post, em_ll = naive_em.run(X, init_mix, init_post)
        if k_cost < k_best_cost:
            k_best_mix, k_best_post, k_best_cost = k_mix, k_post, k_cost
        if em_ll > em_best_ll:
            em_best_mix, em_best_post, em_best_ll = em_mix, em_post, em_ll
    BICs[i] = common.bic(X, em_best_mix, em_best_ll)
    common.plot(X, k_best_mix, k_best_post, "K-means K={}".format(K))
    common.plot(X, em_best_mix, em_best_post, "EM K={}".format(K))

print("BICs: ", BICs)
print("Best BIC: ", np.max(BICs))
print("Best K: ", Ks[np.argmax(BICs)])

X = np.loadtxt("netflix_incomplete.txt")

K = 12
seeds = [0, 1, 2, 3, 4]

em_best_mix, em_best_post, em_best_ll = None, None, -np.inf
for seed in seeds:
    init_mix, init_post = common.init(X, K, seed)
    em_mix, em_post, em_ll = em.run(X, init_mix, init_post)
    if em_ll > em_best_ll:
        em_best_mix, em_best_post, em_best_ll = em_mix, em_post, em_ll
print("K = {}, LL = {}".format(K, em_best_ll))

X_fill_pred = em.fill_matrix(X, em_best_mix)
X_fill = np.load("netflix_complete")

print("X_filled Error:", common.rmse(X_fill_pred, X_fill))
mixtures = [0, 0, 0, 0, 0]

# Posterior probs. for best seeds
posts = [0, 0, 0, 0, 0]

# RMS Error for clusters
rmse = [0., 0.]

start_time = time.perf_counter()

for k in range(len(K)):
    for i in range(5):

        # Run EM
        mixtures[i], posts[i], log_lh[i] = \
        em.run(X, *common.init(X, K[k], i))

    # Print lowest cost
    print("=============== Clusters:", K[k], "======================")
    print("Highest log likelihood using EM is:", np.max(log_lh))

    #    # Save best seed for plotting
    best_seed[k] = np.argmax(log_lh)
    #
    #    # Use the best mixture to fill prediction matrix
    X_pred = em.fill_matrix(X, mixtures[best_seed[k]])
    rmse[k] = common.rmse(X_gold, X_pred)

print("===================================================")
print("RMS Error for K = 12 is: {:.4f}".format(rmse[1]))
end_time = time.perf_counter()
Esempio n. 19
0
import numpy as np
import em
import common

X = np.loadtxt("test_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")

K = 4
n, d = X.shape
seed = 0

# TODO: Your code here

mix_conv, post_conv, log_lh_conv = em.run(X, *common.init(X, K, seed))

X_predict = em.fill_matrix(X, mix_conv)

rmse = common.rmse(X_gold, X_predict)

#%% Begin: Comparison of EM for matrix completion with K = 1 and 12
import time

X = np.loadtxt("netflix_incomplete.txt")
X_gold = np.loadtxt("netflix_complete.txt")

K = [1, 12]  # Clusters to try

log_lh = [0, 0, 0, 0, 0]  # Log likelihoods for different seeds

# Best seed for cluster based on highest log likelihoods
best_seed = [0, 0]
#     # mixture, post, cost = naive_em.run(X, mixture, post)
#     # common.plot(X, mixture, post, f"EM for K={k}")

# EM for collaborative filtering
# X = np.loadtxt("netflix_incomplete.txt")
# k = [1, 12]
# best_seed = np.zeros(2, dtype=np.int)
# for j in range(2):
#     best_cost = -np.inf
#     for seed in range(0, 5):
#         mixture, post = common.init(X, k[j], seed)
#         mixture, post, cost = em.run(X, mixture, post)
#         # cost = common.bic(X, mixture, cost)
#         if cost > best_cost:
#             best_cost = cost
#             best_seed[j] = seed
#     # import pdb; pdb.set_trace()
#         print(f'Cost at k = {k[j]} with seed = {best_seed} is {best_cost}')
#     print(f'Best Cost at k = {k[j]} with seed = {best_seed} is {best_cost}')

# RMSE error with complete data
seed = 1
k = 12
X = np.loadtxt("netflix_incomplete.txt")
mixture, post = common.init(X, k, seed)
mixture, post, cost = em.run(X, mixture, post)
X_pred = em.fill_matrix(X, mixture)
X_gold = np.loadtxt("netflix_complete.txt")
rmse = common.rmse(X_pred, X_gold)
print(f'RMSE = {rmse}')
Esempio n. 21
0
import numpy as np
import common
import em
from scipy.special import logsumexp

### Collborative filtering with EM

X = np.loadtxt("test_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")
X_test = np.loadtxt("test_incomplete.txt")
X_experiment = np.loadtxt("toy_data.txt")

mixture, post = common.init(X, K=12, seed=1)

mixture, post, loglike = em.run(X, mixture, post)

X_pred = em.fill_matrix(X, mixture)

print(common.rmse(X_gold, X_pred))

print(mixture)
#print(em.fill_matrix(X_test
### get the best seed and the best k size that minimizes the cost

## Best seed
# Get the lowest cost
#optimal_seed_cost = em_total_likelihood_dict[0]
#for k, v in em_total_likelihood_dict.items():
#    if v > optimal_seed_cost:
#        optimal_seed_cost = v
#    else:
Esempio n. 22
0
def run_matrix_completion():
    mixture, post = common.init(X, 12, 1)
    mixture, post, ll = em.run(X, mixture, post)
    X_pred = em.fill_matrix(X, mixture)
    X_gold = np.loadtxt('netflix_complete.txt')
    print("root mean squared error:", common.rmse(X_gold, X_pred))
Esempio n. 23
0
import numpy as np
import em
import common

X = np.loadtxt("test_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")

K = 4
n, d = X.shape
seed = 0

mixture, post = common.init(X, K, seed)
mixture, post, ln_like = em.run(X, mixture, post)

print(mixture)
Esempio n. 24
0
# Reporting log likelihood values on Netflix data
# =============================================================================

X = np.loadtxt("netflix_incomplete.txt")

mixture, post = common.init(X, K=1, seed=0)
post, log_likelihood = em.estep(X, mixture)
mixtured = em.mstep(X, post, mixture)

Ks = [1, 12]
seeds = [0, 1, 2, 3, 4]

for K in Ks:
    for seed in seeds:
        mixture, post = common.init(X, K=K, seed=seed)
        mixture, post, log_likelihood = em.run(X, mixture, post)
        print(K, seed, log_likelihood)

# =============================================================================
# Completing missing entries
# =============================================================================

X = np.loadtxt("test_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")

mixture, post = common.init(X, K=4, seed=0)
mixture, post, log_likelihood = em.run(X, mixture, post)
X_pred = em.fill_matrix(X, mixture)
RMSE = common.rmse(X_gold, X_pred)
print(X_pred, RMSE)
Esempio n. 25
0
# print("After first E-step:")
post, ll = em.estep(X, mixture)
# print('post:\n' + str(post))
# print('LL:' + str(ll))
# print()

# print("After first M-step:")
mu, var, p = em.mstep(X, post, mixture)
# print('Mu:\n' + str(mu))
# print('Var: ' + str(var))
# print('P: ' + str(p))
# print()

# print("After a run")
(mu, var, p), post, ll = em.run(X, mixture, post)
# print('Mu:\n' + str(mu))
# print('Var: ' + str(var))
# print('P: ' + str(p))
# print('post:\n' + str(post))
# print('LL: ' + str(ll))
X_pred = em.fill_matrix(X, common.GaussianMixture(mu, var, p))
# error = common.rmse(X_gold, X_pred)
# print("X_gold:\n" + str(X_gold))
# X_pred = np.round(X_pred)
fil = open(
    '/home/animesh/WTA/movie_recommendation/recommender/trainer/test_file.txt',
    'w')
fil.write(str(n) + ' ' + str(d) + '\n')
for i in X_pred:
    for j in i:
Esempio n. 26
0
        cost_min = np.min([cost_min, cost])

    print("K =", K+1, " cost =", cost_min)

print()
print("E-M")
"""
best_K = None
best_bic = float('-inf')
for K in [0, 11]:
    ll_max = float('-inf')
    best_seed = None
    best_mixture = None
    for seed in range(5):
        mixture, post = common.init(X, K + 1, seed)
        mixture, post, ll = em.run(X, mixture, post)
        full_matrix = em.fill_matrix(X, mixture)
        #common.plot(X, mixture, post, "E-M, K="+str(K)+" seed="+str(seed))
        if ll > ll_max:
            best_seed = seed
            ll_max = ll
            best_mixture = mixture
    """
    bic = common.bic(X, best_mixture, ll_max)
    if bic > best_bic:
        best_K = K+1
        best_bic = bic
    """
    print("K =", K + 1, " LL =", ll)
#print("full_matrix =")
#print(full_matrix[4,:])
import em
import common

X = np.loadtxt("netflix_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")

K = 12
n, d = X.shape
seed = 0

# TODO: Your code here
loglikelihoods = []
#bics=[]
for k in [1, 12]:
    log_likelihood_ = []
    for seed in range(5):

        #        bic_=[]
        #for seed in range(4):
        gauss_mixture, post = common.init(X=X, K=k, seed=seed)
        #print('for k =',k, "and seed=",seed, end=" ")
        #print("cost=",cost)
        gauss_mixture_em, post_em, loglikelihood = em.run(
            X, gauss_mixture, post)
        #            bic_.append(common.bic(X,gauss_mixture_em,loglikelihood))
        log_likelihood_.append(loglikelihood)

#    bics.append(bic_)

    loglikelihoods.append(log_likelihood_)
Esempio n. 28
0
import numpy as np
import em
import common

# X = np.loadtxt("test_incomplete.txt")
X = np.loadtxt("netflix_incomplete.txt")

X_gold = np.loadtxt("netflix_complete.txt")

K = 12
n, d = X.shape
# seeds = [0,1,2,3,4]
seeds = [1]
for seed in seeds:
    mixture, post = common.init(X, K, seed)
    # kmixture, kpost, kcost = kmeans.run(X, mixture, post)
    # title = f"K is {K}, seed is {seed}, cost is {kcost}"
    em_mixture, em_post, em_cost = em.run(X, mixture, post)
    X_pred = em.fill_matrix(X, em_mixture)
    rmse = common.rmse(X_gold, X_pred)
    print(f'RMSE is {rmse}')
    # with_bic = common.bic(X, em_mixture, em_cost)
    title = f"K is {K}, seed is {seed}, em_cost is {em_cost}"
    print(title)
    # common.plot(X, em_mixture, em_post, title)
# TODO: Your code here
Esempio n. 29
0
import em
import common

X = np.loadtxt("test_incomplete.txt")
X_gold = np.loadtxt("test_complete.txt")
X_gold_netflix = np.loadtxt("netflix_complete.txt")
X_netflix =np.loadtxt("netflix_incomplete.txt")

K = 12
n, d = X.shape
seed = [0,1,2,3,4]


# TODO: Your code here
for i in range(len(seed)):
    print(seed[i])
    init_model = common.init(X_netflix, K, seed[i])
    mixture, post, cost = em.run(X_netflix, init_model[0], init_model[1])
    X_pred = em.fill_matrix(X_netflix, mixture)
    rmse = common.rmse(X_gold_netflix,X_pred)
    print(cost)
    print(rmse)

# K= 4
# n,d = X.shape
# seed =0
# init_model = common.init(X, K, seed)
# mixture, post, cost = em.run(X, init_model[0], init_model[1])
# # print(mixture)
# X_pred = em.fill_matrix(X,mixture)
# print(X_pred)
Esempio n. 30
0
seeds = [0, 1, 2, 3, 4]
K = [1, 12]
bic = np.zeros(len(K))

for j, k in enumerate(K):
    mixtures = []
    posts = []
    logloss = np.empty(len(seeds))

    for i, seed in enumerate(seeds):
        # initialize mixture model with random points
        mixture, post = common.init(X, K=k, seed=seed)

        # run EM-algorithm
        mixture, post, LL = em.run(X, mixture=mixture, post=post)

        mixtures.append(mixture)
        posts.append(post)
        logloss[i] = LL
        print('K=', k, 'seed=', seed, 'logloss=', LL)

    best_seed = np.argmax(logloss)
    logloss = logloss[best_seed]
    mixture = mixtures[best_seed]
    post = posts[best_seed]

    current_bic = common.bic(X, mixture, logloss)
    bic[j] = current_bic

    print(f'K={k}', f'Best seed={best_seed}', f'logloss={logloss}', f'BIC={current_bic}')
Esempio n. 31
0
#         gaussian, post, new_ll = kmeans.run(X, gaussian, post)
#         common.plot(X, gaussian, post, "K-means: number of classes{}, random seed {}".format(k, i))
#
# for k in range(1, 5, 1):
#     for i in range(1):
#         gaussian, post = common.init(X, k, seed=i)
#         gaussian, post, new_ll = naive_em.run(X, gaussian, post)
#         common.plot(X, gaussian, post, "EM: number of classes{}, random seed {}".format(k, i))

X = np.loadtxt("netflix_incomplete.txt")
X_gold = np.loadtxt('netflix_complete.txt')
# for k in [1, 12]:
#     for i in range(5):
#         gaussian, post = common.init(X, k, seed=i)
#         gaussian, post, new_ll = em.run(X, gaussian, post)
#         print("EM: number of classes {}, random seed {}:".format(k, i))
#         print(new_ll)

gaussian, post = common.init(X, 12, seed=1)
gaussian, post, new_ll = em.run(X, gaussian, post)
X_pred = em.fill_matrix(X, gaussian)
print(common.rmse(X_gold, X_pred))

# for k in range(1, 5, 1):
#     for i in range(5):
#         gaussian, post = common.init(X, k, seed=i)
#         gaussian, post, new_ll = naive_em.run(X, gaussian, post)
#         print("BIC = {} for K = {} and seed = {}".format(common.bic(X, gaussian, new_ll), k, i))
#
#