def main(): # declare variables K_value = 3 max_epoch = 1000 repeat_num = 8 repeat_num_gmm = 1 name = 'GMM_dataset.txt' # get the training data training_data = data_loading(name) if (sys.argv[1] == 'kmean'): # run the K-mean program program_name = './a.out' parameter_line = ' ' + 'training_kmeans ' + str(K_value) + ' ' + str( repeat_num) + ' 0' print('Running K-mean') os.system(program_name + parameter_line) print('The Program is done.') # read minimum SSE Position min_sse_pos = np.loadtxt('./min_sse_pos.csv', delimiter=',', skiprows=0) # read clusters from K-mean algorithm kmean_name = './all_cluster_center' + str(int(min_sse_pos)) + '.csv' kmean_clusters = np.loadtxt(kmean_name, delimiter=',', skiprows=0) for draw_ind in range(int(kmean_clusters.shape[0] / K_value)): # the last index: (int(kmean_clusters.shape[0] / K_value) - 1) start_index = draw_ind * K_value iter_kmean_clusters = kmean_clusters[start_index:start_index + K_value, :] # label assignment out_label, cov_list = label_assignment(iter_kmean_clusters, training_data) # save the figures plt.rcParams.update({'figure.max_open_warning': 0}) fig, ax = plt.subplots() ax.scatter(training_data[:, 0], training_data[:, 1], c=out_label, alpha=0.5) ax.scatter(iter_kmean_clusters[:, 0], iter_kmean_clusters[:, 1], c='b', s=100, alpha=0.5) plt.xlabel('Feature: x1') plt.ylabel('Feature: x2') plt.title('K-mean Clustering') fig.savefig('./kmean_result/iter' + str(draw_ind) + '.png') #fig.clf() elif (sys.argv[1] == 'gmm'): # read minimum SSE Position min_sse_pos = np.loadtxt('./min_sse_pos.csv', delimiter=',', skiprows=0) # read clusters from K-mean algorithm kmean_name = './all_cluster_center' + str(int(min_sse_pos)) + '.csv' kmean_clusters = np.loadtxt(kmean_name, delimiter=',', skiprows=0) start_index = (int(kmean_clusters.shape[0] / K_value) - 1) * K_value iter_kmean_clusters = kmean_clusters[start_index:start_index + K_value, :] out_label, cov_list = label_assignment(iter_kmean_clusters, training_data) # call GMM class gmm = GMM(K_value, repeat_num_gmm, max_epoch, training_data) all_likelihood, parameters = gmm.model_training( iter_kmean_clusters, out_label, cov_list) true_mu, true_covariance = gmm.fit_true_model(training_data) # find the mu, covariance, and prior best_likelihood, all_mu, all_cov, all_prior = find_the_best( all_likelihood, parameters) # prediction phase prediction = gmm.model_predict(all_mu[-1], all_cov[-1], all_prior[-1], training_data) labels = label_GMM(prediction) # drawing gaussian functions for ind in range(len(all_mu)): out_para = drawing_Gaussian(all_mu[ind], all_cov[ind], training_data, all_mu[-1], 1, ind) if ind == (len(all_mu) - 1): # print out parameters of the Gaussian function for ind_2 in range(K_value): print('Cluster:', ind_2) print('Mu:') print(out_para[str(ind_2)][0]) print('Covariance:') print(out_para[str(ind_2)][1]) print('=====================') # drawing true gaussian functions if K_value == 3: out_param_true = drawing_Gaussian(true_mu, true_covariance, training_data, true_mu, 2, None) # print out parameters of the Gaussian function for ind_3 in range(K_value): print('Cluster:', ind_3) print('Actual Mu:') print(out_param_true[str(ind_3)][0]) print('Actual Covariance:') print(out_param_true[str(ind_3)][1]) print('=====================') # drawing log-likelihood values drawing_Log_likelihood(best_likelihood) elif (sys.argv[1] == 'saving'): # save the data as a csv file save_data_csv_file(training_data) else: print('Error Input. Please re-choose the task!!')