Exemple #1
0
def em_clustering(data, nclust, maxiter, epsilon):
    #Initialization of mean, covariance, and prior
    n, d = data.shape
    mu = np.zeros((nclust, d), dtype=float)
    sigma = np.zeros((nclust, d, d), dtype=float)
    for t in range(nclust):  # assigning  data points to the means
        mu[t] = data[t]
        sigma[t] = np.identity(d)
    prior = np.asarray(np.repeat(1.0 / nclust, nclust),
                       dtype=float)  #for each cluster one prior:

    for i in range(maxiter):
        mu_old = 1 * mu
        W = EM.e_step(data, mu, sigma, prior, nclust)  #calling E-step funct.
        mu, sigma, prior = EM.m_step(data, W, mu, sigma,
                                     nclust)  # calling M-step funct.
        #checking stopping criterion
        temp = 0
        for j in range(nclust):
            temp = temp + np.sqrt(np.power((mu[j] - mu_old[j]), 2).sum())
        temp = round(temp, 4)
        if temp <= epsilon:
            break
        #print  "Iteration number = %d, stopping criterion = %.4f" %(i+1,temp)
    return mu, sigma, prior
def model_eval(dataFile, nclust, maxiter, epsilon):
    #reading data: X- data, y- class att.
    X, y = readingData.dataPrep(dataFile)
    #training
    start_time = time.time()
    mu, sigma, prior = EM.em_clustering(X, nclust, maxiter, epsilon)
    averageTraningTime = time.time() - start_time
    #testing
    W = EM.e_step(X, mu, sigma, prior, nclust)
    accuracy = testing.test(y, W, X)
    averageTraningTime = round(averageTraningTime, 3)
    accuracy = int(round(accuracy * 100))
    print(" Traning running time :%s seconds " % averageTraningTime)
    print("accuracy:%s%%" % accuracy)