def em_clustering(data, nclust, maxiter, epsilon): #Initialization of mean, covariance, and prior n, d = data.shape mu = np.zeros((nclust, d), dtype=float) sigma = np.zeros((nclust, d, d), dtype=float) for t in range(nclust): # assigning data points to the means mu[t] = data[t] sigma[t] = np.identity(d) prior = np.asarray(np.repeat(1.0 / nclust, nclust), dtype=float) #for each cluster one prior: for i in range(maxiter): mu_old = 1 * mu W = EM.e_step(data, mu, sigma, prior, nclust) #calling E-step funct. mu, sigma, prior = EM.m_step(data, W, mu, sigma, nclust) # calling M-step funct. #checking stopping criterion temp = 0 for j in range(nclust): temp = temp + np.sqrt(np.power((mu[j] - mu_old[j]), 2).sum()) temp = round(temp, 4) if temp <= epsilon: break #print "Iteration number = %d, stopping criterion = %.4f" %(i+1,temp) return mu, sigma, prior
def model_eval(dataFile, nclust, maxiter, epsilon): #reading data: X- data, y- class att. X, y = readingData.dataPrep(dataFile) #training start_time = time.time() mu, sigma, prior = EM.em_clustering(X, nclust, maxiter, epsilon) averageTraningTime = time.time() - start_time #testing W = EM.e_step(X, mu, sigma, prior, nclust) accuracy = testing.test(y, W, X) averageTraningTime = round(averageTraningTime, 3) accuracy = int(round(accuracy * 100)) print(" Traning running time :%s seconds " % averageTraningTime) print("accuracy:%s%%" % accuracy)