Exemple #1
0
def test_seeds(X, K):
    print("\n############## KMEAN K=" + str(K) + " ###############")

    mixture0, post0 = common.init(X, K, 0)
    mixture1, post1 = common.init(X, K, 1)
    mixture2, post2 = common.init(X, K, 2)
    mixture3, post3 = common.init(X, K, 3)
    mixture4, post4 = common.init(X, K, 4)

    cost0 = kmeans.run(X, mixture0, post0)[2]
    cost1 = kmeans.run(X, mixture1, post1)[2]
    cost2 = kmeans.run(X, mixture2, post2)[2]
    cost3 = kmeans.run(X, mixture3, post3)[2]
    cost4 = kmeans.run(X, mixture4, post4)[2]

    print("K=" + str(K) + " seed=0 : cost=" + str(cost0))
    print("K=" + str(K) + " seed=1 : cost=" + str(cost1))
    print("K=" + str(K) + " seed=2 : cost=" + str(cost2))
    print("K=" + str(K) + " seed=3 : cost=" + str(cost3))
    print("K=" + str(K) + " seed=4 : cost=" + str(cost4))

    naive_em_estimate0 = naive_em.run(X, mixture0, post0)
    naive_em_estimate1 = naive_em.run(X, mixture1, post1)
    naive_em_estimate2 = naive_em.run(X, mixture2, post2)
    naive_em_estimate3 = naive_em.run(X, mixture3, post3)
    naive_em_estimate4 = naive_em.run(X, mixture4, post4)

    print("K=" + str(K) + " seed=0 : likelihood=" + str(naive_em_estimate0[2]))
    print("K=" + str(K) + " seed=1 : likelihood=" + str(naive_em_estimate1[2]))
    print("K=" + str(K) + " seed=2 : likelihood=" + str(naive_em_estimate2[2]))
    print("K=" + str(K) + " seed=3 : likelihood=" + str(naive_em_estimate3[2]))
    print("K=" + str(K) + " seed=4 : likelihood=" + str(naive_em_estimate4[2]))
def run_naive_em(X):
    for K in [1,2,3,4]:
        likelihood_ls = []
        for seed in range(5):
            mixture, post = common.init(X, K, seed)
            mixture, post, LL = naive_em.run(X,mixture,post)
            likelihood_ls.append(LL)


        print("The likelihood of {} cluster is".format(K), max(likelihood_ls))
        best_seed = np.argmax(likelihood_ls)
        for seed_ in [best_seed]:
            mixture, post = common.init(X, K, int(seed_))
            mixture, post, LL= naive_em.run(X, mixture, post)
            common.plot(X, mixture, post, "{} mixtures with seed{}".format(K, seed_))
    return "Done"
Exemple #3
0
def run_naive_em():
    for K in range(1, 5):
        max_ll = None
        best_seed = None
        for seed in range(0, 5):
            mixture, post = common.init(X, K, seed)
            mixture, post, ll = naive_em.run(X, mixture, post)
            if max_ll is None or ll > max_ll:
                max_ll = ll
                best_seed = seed

        mixture, post = common.init(X, K, best_seed)
        mixture, post, ll = naive_em.run(X, mixture, post)
        title = "EM for K={}, seed={}, ll={}".format(K, best_seed, ll)
        print(title)
        common.plot(X, mixture, post, title)
def select_best_bic(X):
    bic_ls = []
    for K in [1,2,3,4]:
        likelihood_ls = []
        bic_ls_seed = []

        for seed in range(5):
            mixture, post = common.init(X, K, seed)
            mixture, post, LL = naive_em.run(X,mixture,post)
            likelihood_ls.append(LL)
            bic_ls_seed.append(common.bic(X, mixture, LL))

        best_seed = np.argmax(bic_ls_seed)

        mixture, post = common.init(X, K, int(best_seed))
        mixture, post, LL = naive_em.run(X, mixture, post)
        bic_ls.append(common.bic(X,mixture,LL))
    print("The best K is {} with bic {}".format(np.argmax(bic_ls)+1, max(bic_ls)))
    return "Done"
def run_naive_em_with_bic():
    max_bic = None
    for K in range(1, 5):
        max_ll = None
        best_seed = None
        for seed in range(0, 5):
            mixture, post = common.init(X, K, seed)
            mixture, post, ll = naive_em.run(X, mixture, post)
            if max_ll is None or ll > max_ll:
                max_ll = ll
                best_seed = seed

        mixture, post = common.init(X, K, best_seed)
        mixture, post, ll = naive_em.run(X, mixture, post)
        bic = common.bic(X, mixture, ll)
        if max_bic is None or bic > max_bic:
            max_bic = bic
        title = "EM for K={}, seed={}, ll={}, bic={}".format(K, best_seed, ll, bic)
        print(title)
        common.plot(X, mixture, post, title)
Exemple #6
0
def test_naive_em():
    for k in [1, 2, 3, 4]:
        para_list = []
        for seed in [0, 1, 2, 3, 4]:
            gm, post = common.init(X, k, seed)
            mixture, p, cost = naive_em.run(X, gm, post)
            para_list.append((mixture, p, cost))
        max_para = max(para_list, key=lambda x: x[2])
        common.plot(X, max_para[0], max_para[1],
                    'EM on toy data with {k}'.format(k=k))
    return max_para[0], max_para[1]
def run_naive_em(X, plot=False):
    max_bic = None
    for i in range(len(K)):
        max_ln_like = None
        best_seed = None
        for j in range(len(seed)):
            mixture, post = common.init(X, K[i], seed[j])
            mixture, post, ln_like = naive_em.run(X, mixture, post)
            if max_ln_like is None or ln_like > max_ln_like:
                max_ln_like = ln_like
                best_seed = seed[j]
            if plot:
                common.plot(X, mixture, post,
                            "K={}, seed={}".format(K[i], seed[j]))

        mixture, post = common.init(X, K[i], best_seed)
        mixture, post, ln_like = naive_em.run(X, mixture, post)
        bic = common.bic(X, mixture, ln_like)
        if max_bic is None or bic > max_bic:
            max_bic = bic
        print("K = {}, Max ln(likelihood) = {}, Best seed = {}, Max BIC = {}".
              format(K[i], max_ln_like, best_seed, max_bic))
Exemple #8
0
def select_k_em():
    """
    Select the best K based on BIC

    :return:
    """
    for k in [1, 2, 3, 4]:
        para_list = []
        for seed in [0, 1, 2, 3, 4]:
            gm, post = common.init(X, k, seed)
            mixture, p, cost = naive_em.run(X, gm, post)
            para_list.append((mixture, p, cost))
        max_para = max(para_list, key=lambda x: x[2])
        print(common.bic(X, max_para[0], max_para[2]))
Exemple #9
0
# 2dgaussian
mixture, post = common.init(X, 1)
mu, var, p = mixture
test_2dgaussian_pdf(X, mu, var)

# E_step
mixture, post = common.init(X, 3, seed=0)
mu, var, p = mixture
post, log_likelihood = naive_em.estep(X, mixture)

# M_step
mixture = naive_em.mstep(X, post)

# RUN
mixture, post = common.init(X, 3, seed=0)
mixture, post, log_likelihood = naive_em.run(X, mixture, post)

# =============================================================================
# 4. Comparing K-means and EM
# =============================================================================

for K in Ks:
    for seed in seeds:
        mixture, post = common.init(X, K=K, seed=seed)  # Initialize K-means
        mixture, post, log_likelihood = naive_em.run(X, mixture, post)
        common.plot(X, mixture, post, [K, seed])
        print(K, seed, log_likelihood)

# =============================================================================
# 5. Bayesian Information Criterion
# Picking the best K
    def run_full_em(self, X, K, seed, expected_cost):
        mixture, post = common.init(X, K, seed)
        new_mixture, soft_counts, cost = naive_em.run(X, mixture, post)

        self.assertEqual(np.isclose(cost, expected_cost), True,
                         f'Cost: got {cost}, expected {expected_cost}')
Exemple #11
0
    print("----- Clusters", k + 1, " -----")
    print("Lowest cost: ", np.min(costs_kMeans))
    print("Best seed: ", np.argmin(costs_kMeans))

print("******* End of section 2 *******\n ")

######### Section 4: Comparing K-means and EM ############
print("******* Section 4 *******\n ")
costs_EM = [0, 0, 0, 0, 0]
mixtures_EM = [0, 0, 0, 0, 0]  # Mixtures for best seed
bic = [0., 0., 0., 0.]  # BIC for best cluster

for k in range(len(K)):
    for i in range(len(seeds)):
        mixtures_EM[i], _, costs_EM[i] = naive_em.run(
            X, *common.init(X, K[k], seeds[i]))

    bic[k] = common.bic(X, mixtures_EM[np.argmax(costs_EM)], np.max(costs_EM))

    print("----- Mixture ", k + 1, " -----")
    print("Highest log: ", np.max(costs_EM))
    print("Best seed: ", np.argmax(costs_EM))

print("******* End of section 4 *******\n ")

######### Section 5: Bayesian Information Criterion ############
print("******* Section 5 *******\n ")
print("Best K: ", np.argmax(bic) + 1)
print("BIC for the best K: ", np.max(bic))

print("******* End of section 5 *******\n ")
Exemple #12
0

X = np.loadtxt("toy_data.txt")
K = [1, 2, 3, 4]
# TODO: Your code here
costs = []
loglikelihoods = []
bics = []
for k in K:
    cost_seeds_ = []
    log_likelihood_ = []
    bic_ = []
    for seed in range(4):
        gauss_mixture, post = common.init(X=X, K=k, seed=seed)
        gauss_mixture_kmeans, post_kmeans, cost = kmeans.run(
            X=X, mixture=gauss_mixture, post=post)
        #print('for k =',k, "and seed=",seed, end=" ")
        #print("cost=",cost)
        gauss_mixture_em, post_em, loglikelihood = naive_em.run(
            X, gauss_mixture, post)
        bic_.append(common.bic(X, gauss_mixture_em, loglikelihood))
        log_likelihood_.append(loglikelihood)
        cost_seeds_.append(cost)
#        plot_points(X,post_kmeans,
#                    title="kmeans with k:"+str(k)+" seed:"+str(seed))
#        plot_points(X,post_em,
#                    title="em with k:"+str(k)+" seed:"+str(seed))
    bics.append(bic_)
    costs.append(cost_seeds_)
    loglikelihoods.append(log_likelihood_)
Exemple #13
0
#Mixture for Best Seed for Algo
mixture_kmeans = [0, 0, 0, 0, 0]
mixture_EM = [0, 0, 0, 0, 0]

# Posterior probs. for best seeds
post_kmeans = [0, 0, 0, 0, 0]
post_EM = [0, 0, 0, 0, 0]

# BIC score of cluster
bic = [0., 0., 0., 0.]

for k in range(len(K)):
    for i in range(len(seeds)):
        mixture_kmeans[i], post_kmeans[i], cost_kmeans[i] = kmeans.run(
            X, *common.init(X, K[k], seeds[i]))
        mixture_EM[i], post_EM[i], cost_EM[i] = naive_em.run(
            X, *common.init(X, K[k], seeds[i]))

    print("=============== Clusters:", k + 1, "======================")
    print("Lowest cost using kMeans is:", np.min(cost_kmeans))
    print("Lowest cost using EM is:", np.max(cost_EM))

    #Save best seed for plotting
    bestseed_kmeans[k] = np.argmin(cost_kmeans)
    bestseed_EM[k] = np.argmax(cost_EM)

    common.plot(X,
                mixture_kmeans[bestseed_kmeans[k]],
                post_kmeans[bestseed_kmeans[k]],
                title="kmeans")

    common.plot(X,
Exemple #14
0
    mixtures, posts, costs = [], [], []
    for seed_i in range(seeds.shape[0]):
        mixture, post = common.init(X, k, seeds[seed_i])
        mixture, post, cost = kmeans.run(X, mixture, post)
        mixtures.append(mixture)
        posts.append(post)
        costs.append(cost)
        if seed_i > 0 and cost < costs[seed_i - 1]:
            min_cost_seed_i = seed_i

    common.plot(X, mixtures[min_cost_seed_i], posts[min_cost_seed_i],
                "k-mean k:" + str(k) + " seed:" + str(min_cost_seed_i))
    print(k, cost, min_cost_seed_i)

for k in K:
    seeds = np.array([0, 1, 2, 3, 4])
    #k_cost = np.zeros((seeds.shape[0], 2))
    min_cost_seed_i = 0
    mixtures, posts, costs = [], [], []
    for seed_i in range(seeds.shape[0]):
        mixture, post = common.init(X, k, seeds[seed_i])
        mixture, post, cost = naive_em.run(X, mixture, post)
        mixtures.append(mixture)
        posts.append(post)
        costs.append(cost)
        if seed_i > 0 and cost > costs[seed_i - 1]:
            min_cost_seed_i = seed_i

    common.plot(X, mixtures[min_cost_seed_i], posts[min_cost_seed_i],
                "EM k:" + str(k) + " seed:" + str(min_cost_seed_i))
    print(k, cost, min_cost_seed_i)
Exemple #15
0
    print("K=" + str(K) + " seed=2 : likelihood=" + str(cost2))
    print("K=" + str(K) + " seed=3 : likelihood=" + str(cost3))
    print("K=" + str(K) + " seed=4 : likelihood=" + str(cost4))


# K mean initialization

test_seeds(toy_X, 1)
test_seeds(toy_X, 2)
test_seeds(toy_X, 3)
test_seeds(toy_X, 4)

# EM algo
print("############## EM Algorythme implemented ###############")
mixture, post = common.init(toy_X, 3, 0)
naive_em_estimate = naive_em.run(toy_X, mixture, post)[2]
print("naive EM log likelihood : " + str(naive_em_estimate))

print("############## Some Tests ######################")
initialMixture, initialPost = common.init(toy_X, 1, 0)
mixtureEM1, postEM1, ll1 = naive_em.run(toy_X, initialMixture, initialPost)

initialMixture, initialPost = common.init(toy_X, 2, 0)
mixtureEM2, postEM2, ll2 = naive_em.run(toy_X, initialMixture, initialPost)

initialMixture, initialPost = common.init(toy_X, 3, 0)
mixtureEM3, postEM3, ll3 = naive_em.run(toy_X, initialMixture, initialPost)

initialMixture, initialPost = common.init(toy_X, 4, 0)
mixtureEM4, postEM4, ll4 = naive_em.run(toy_X, initialMixture, initialPost)
Exemple #16
0
posts_kMeans = [0, 0, 0, 0, 0]
posts_EM = [0, 0, 0, 0, 0]

# BIC score of cluster
bic = [0., 0., 0., 0.]

for k in range(len(K)):
    for i in range(len(seeds)):

        # Run kMeans
        mixtures_kMeans[i], posts_kMeans[i], costs_kMeans[i] = \
        kmeans.run(X, *common.init(X, K[k], seeds[i]))

        # Run Naive EM
        mixtures_EM[i], posts_EM[i], costs_EM[i] = \
        naive_em.run(X, *common.init(X, K[k], seeds[i]))

    # Print lowest cost
    print("=============== Clusters:", k + 1, "======================")
    print("Lowest cost using kMeans is:", np.min(costs_kMeans))
    print("Highest log likelihood using EM is:", np.max(costs_EM))

    # Save best seed for plotting
    best_seed_kMeans[k] = np.argmin(costs_kMeans)
    best_seed_EM[k] = np.argmax(costs_EM)

    # Plot kMeans and EM results
    common.plot(X,
                mixtures_kMeans[best_seed_kMeans[k]],
                posts_kMeans[best_seed_kMeans[k]],
                title="kMeans")
Exemple #17
0
def test_step():
    mixture, post = common.init(X, 3, 0)
    mixture, soft_counts, ll = naive_em.run(X, mixture, post)
    print("Log-likelihood: {}".format(ll))
Exemple #18
0
import numpy as np
import em
import naive_em
import common

# X = np.loadtxt("test_incomplete.txt")
# X_gold = np.loadtxt("test_complete.txt")

testcase = 2
if (testcase == 1):  # for naive_em
    X = np.loadtxt("toy_data.txt")
    K = 3
    seed = 0
    n, d = X.shape
    mixture, post = common.init(X, K, seed)
    mixture, post, ll = naive_em.run(X, mixture, post)
    result = "with naive_em, ll = {}".format(ll)
    print(result)

if (testcase == 2):
    X = np.loadtxt("netflix_incomplete.txt")
    # X = np.loadtxt("toy_data.txt")
    n, d = X.shape
    for K in [1, 12]:
        max_ll = None
        for seed in range(0, 5):
            ll = None
            mixture, post = common.init(X, K, seed)
            mixture, post, ll = em.run(X, mixture, post)
            if max_ll is None or ll > max_ll:
                max_ll = ll
Exemple #19
0
import numpy as np
import kmeans
import common
import naive_em
import em

X = np.loadtxt("netflix_complete.txt")
K = 12
# TODO: Your code here
for seed in range(5):
    mixtures , post = common.init(X , K , seed)   

    # m, p, cost = kmeans.run(X , mixtures , post)
    # print (cost)
    # common.plot(X , mixtures , post , "Title")

    m, p, cost = naive_em.run(X , mixtures , post)      
    print (common.bic(X , m , cost))
    # common.plot(X , mixtures , post , "Title")

Exemple #20
0
import naive_em
import em

X = np.loadtxt("toy_data.txt")

Ks = [1, 2, 3, 4]
seeds = [0, 1, 2, 3, 4]
BICs = np.empty(len(Ks))

for i, K in enumerate(Ks):
    k_best_mix, k_best_post, k_best_cost = None, None, np.inf
    em_best_mix, em_best_post, em_best_ll = None, None, -np.inf
    for seed in seeds:
        init_mix, init_post = common.init(X, K, seed)
        k_mix, k_post, k_cost = kmeans.run(X, init_mix, init_post)
        em_mix, em_post, em_ll = naive_em.run(X, init_mix, init_post)
        if k_cost < k_best_cost:
            k_best_mix, k_best_post, k_best_cost = k_mix, k_post, k_cost
        if em_ll > em_best_ll:
            em_best_mix, em_best_post, em_best_ll = em_mix, em_post, em_ll
    BICs[i] = common.bic(X, em_best_mix, em_best_ll)
    common.plot(X, k_best_mix, k_best_post, "K-means K={}".format(K))
    common.plot(X, em_best_mix, em_best_post, "EM K={}".format(K))

print("BICs: ", BICs)
print("Best BIC: ", np.max(BICs))
print("Best K: ", Ks[np.argmax(BICs)])

X = np.loadtxt("netflix_incomplete.txt")

K = 12
Exemple #21
0
def naive_em_function(X, K, seed):
    init_model = common.init(X, K, seed)
    mixture, post, cost = naive_em.run(X, init_model[0], init_model[1])
    return mixture, post, cost
import numpy as np
import kmeans
import common
import naive_em
import em

X = np.loadtxt("datas/toy_data.txt")

K = [1, 2, 3, 4]
seeds = [0, 1, 2, 3, 4]

for k in K:
    KM_best_mixture, KM_best_post, KM_best_cost = None, None, np.inf
    EM_best_mixture, EM_best_post, EM_best_logvrais = None, None, -np.inf
    for seed in seeds:
        init_mixture, init_post = common.init(X, k, seed)
        # Modèle KMeans
        KM_mixture, KM_post, KM_cost = kmeans.run(X, init_mixture, init_post)
        if KM_cost < KM_best_cost:
            KM_best_mixture, KM_best_post, KM_best_cost = KM_mixture, KM_post, KM_cost
        # Modèle EM
        EM_mixture, EM_post, EM_logvrais = naive_em.run(X, init_mixture, init_post)
        if EM_logvrais > EM_best_logvrais:
            EM_best_mixture, EM_best_post, EM_best_logvrais = EM_mixture, EM_post, EM_logvrais
    common.plot(X, KM_best_mixture, KM_best_post, f"K-means K={k}")
    common.plot(X, EM_best_mixture, EM_best_post, f"EM K={k}")
Exemple #23
0
import kmeans
import common
import naive_em
import em

X = np.loadtxt("toy_data.txt")
seeds = [0, 1, 2, 3, 4]
K = [1, 2, 3, 4]

kbest = 1
bestbic = -100000000

for k in K:
    best = 100000000
    seed_best = 0
    for seed in seeds:
        mixtures, post = common.init(X, k, seed)
        tupl = naive_em.run(X, mixtures, None)
        if (best > tupl[2]):
            best = tupl[2]
            seed_best = seed
    mixtures, post = common.init(X, k, seed_best)
    tupl = naive_em.run(X, mixtures, None)
    bi = common.bic(X, mixtures, tupl[2])
    if (bi > bestbic):
        bestbic = bi
        kbest = k

print(kbest)
print(bestbic)
    mixtures_em = []
    posts_em = []
    costs_em = np.empty(len(seeds))
    logloss = np.empty(len(seeds))

    for i, seed in enumerate(seeds):
        # initialize mixture model with random points
        # init(X,K) returns a K-component mixture model with means, variances and mixing proportions.
        mixture, post = common.init(X, K=k, seed=seed)
        mixture_em, post_em = common.init(X, K=k, seed=seed)  # For EM algorithm initialisation

        # run k-means function
        mixture, post, cost = kmeans.run(X, mixture=mixture, post=post)

        # run EM Algo function
        mixture_em, post_em, ll = naive_em.run(X, mixture=mixture_em, post=post_em)

        # Update k-means values
        mixtures.append(mixture)
        posts.append(post)
        costs[i] = cost
        # print(k, seed, costs)

        # Update EM values
        mixtures_em.append(mixture_em)
        posts_em.append(post_em)
        logloss[i] = ll
        # print(k, seed, costs_em)


    # Finding the best/min cost of k-means
Exemple #25
0
import numpy as np
import kmeans
import common
import naive_em
import em

X = np.loadtxt("toy_data.txt")
K = 4
seeds = [0, 1, 2, 3, 4]
for seed in seeds:
    mixture, post = common.init(X, K, seed)
    # kmixture, kpost, kcost = kmeans.run(X, mixture, post)
    # title = f"K is {K}, seed is {seed}, cost is {kcost}"
    em_mixture, em_post, em_cost = naive_em.run(X, mixture, post)
    with_bic = common.bic(X, em_mixture, em_cost)
    title = f"K is {K}, seed is {seed}, em_cost is {em_cost}, with_bic is {with_bic}"
    print(title)
    common.plot(X, em_mixture, em_post, title)

# TODO: Your code here
Exemple #26
0
seeds = [0, 1, 2, 3, 4]
K = [1, 2, 3, 4]
bic = np.zeros(len(K))

for j, k in enumerate(K):
    mixtures = []
    posts = []
    logloss = np.empty(len(seeds))

    for i, seed in enumerate(seeds):
        # initialize mixture model with random points
        mixture, post = common.init(X, K=k, seed=seed)

        # run EM-algorithm
        mixture, post, LL = naive_em.run(X, mixture=mixture, post=post)

        mixtures.append(mixture)
        posts.append(post)
        logloss[i] = LL
        #print('K=', k, 'seed=', seed, 'logloss=', LL)

    best_seed = np.argmax(logloss)
    logloss = logloss[best_seed]
    mixture = mixtures[best_seed]
    post = posts[best_seed]

    current_bic = common.bic(X, mixture, logloss)
    bic[j] = current_bic

    print(f'K={k}', f'Best seed={best_seed}', f'logloss={logloss}', f'BIC={current_bic}')
Exemple #27
0
import numpy as np
import kmeans
import common
import naive_em
import em

X = np.loadtxt("toy_data.txt")

for i in range(4):
    for j in range(5):
        initial_mixture, post = common.init(X, i + 1, j)
        #M, L, cost_final = kmeans.run(X, initial_mixture, post)
        #title = "K means for K "+str(i+1)+" seed " +str(j)
        #common.plot(X, M, L, title)
        #print("For K "+ str(i+1) + " seed " + str(j) +" cost is " + str(cost_final))

        M, L, likelihood = naive_em.run(X, initial_mixture, post)
        bic = common.bic(X, M, likelihood)

        title = "EM for K " + str(i + 1) + " seed " + str(j)
        common.plot(X, M, L, title)
        print("For K " + str(i + 1) + " seed " + str(j) + " likelihood is " +
              str(likelihood) + " bic is " + str(bic))