Example #1
0
def main():
    # declare variables
    K_value = 3
    max_epoch = 1000
    repeat_num = 8
    repeat_num_gmm = 1
    name = 'GMM_dataset.txt'

    # get the training data
    training_data = data_loading(name)

    if (sys.argv[1] == 'kmean'):
        # run the K-mean program
        program_name = './a.out'
        parameter_line = ' ' + 'training_kmeans ' + str(K_value) + ' ' + str(
            repeat_num) + ' 0'
        print('Running K-mean')
        os.system(program_name + parameter_line)
        print('The Program is done.')

        # read minimum SSE Position
        min_sse_pos = np.loadtxt('./min_sse_pos.csv',
                                 delimiter=',',
                                 skiprows=0)

        # read clusters from K-mean algorithm
        kmean_name = './all_cluster_center' + str(int(min_sse_pos)) + '.csv'
        kmean_clusters = np.loadtxt(kmean_name, delimiter=',', skiprows=0)

        for draw_ind in range(int(kmean_clusters.shape[0] / K_value)):
            # the last index: (int(kmean_clusters.shape[0] / K_value) - 1)
            start_index = draw_ind * K_value
            iter_kmean_clusters = kmean_clusters[start_index:start_index +
                                                 K_value, :]

            # label assignment
            out_label, cov_list = label_assignment(iter_kmean_clusters,
                                                   training_data)

            # save the figures
            plt.rcParams.update({'figure.max_open_warning': 0})
            fig, ax = plt.subplots()
            ax.scatter(training_data[:, 0],
                       training_data[:, 1],
                       c=out_label,
                       alpha=0.5)
            ax.scatter(iter_kmean_clusters[:, 0],
                       iter_kmean_clusters[:, 1],
                       c='b',
                       s=100,
                       alpha=0.5)
            plt.xlabel('Feature: x1')
            plt.ylabel('Feature: x2')
            plt.title('K-mean Clustering')
            fig.savefig('./kmean_result/iter' + str(draw_ind) + '.png')
            #fig.clf()
    elif (sys.argv[1] == 'gmm'):
        # read minimum SSE Position
        min_sse_pos = np.loadtxt('./min_sse_pos.csv',
                                 delimiter=',',
                                 skiprows=0)

        # read clusters from K-mean algorithm
        kmean_name = './all_cluster_center' + str(int(min_sse_pos)) + '.csv'
        kmean_clusters = np.loadtxt(kmean_name, delimiter=',', skiprows=0)
        start_index = (int(kmean_clusters.shape[0] / K_value) - 1) * K_value
        iter_kmean_clusters = kmean_clusters[start_index:start_index +
                                             K_value, :]
        out_label, cov_list = label_assignment(iter_kmean_clusters,
                                               training_data)

        # call GMM class
        gmm = GMM(K_value, repeat_num_gmm, max_epoch, training_data)
        all_likelihood, parameters = gmm.model_training(
            iter_kmean_clusters, out_label, cov_list)
        true_mu, true_covariance = gmm.fit_true_model(training_data)
        # find the mu, covariance, and prior
        best_likelihood, all_mu, all_cov, all_prior = find_the_best(
            all_likelihood, parameters)

        # prediction phase
        prediction = gmm.model_predict(all_mu[-1], all_cov[-1], all_prior[-1],
                                       training_data)
        labels = label_GMM(prediction)

        # drawing gaussian functions
        for ind in range(len(all_mu)):
            out_para = drawing_Gaussian(all_mu[ind], all_cov[ind],
                                        training_data, all_mu[-1], 1, ind)
            if ind == (len(all_mu) - 1):
                # print out parameters of the Gaussian function
                for ind_2 in range(K_value):
                    print('Cluster:', ind_2)
                    print('Mu:')
                    print(out_para[str(ind_2)][0])
                    print('Covariance:')
                    print(out_para[str(ind_2)][1])
                    print('=====================')
        # drawing true gaussian functions
        if K_value == 3:
            out_param_true = drawing_Gaussian(true_mu, true_covariance,
                                              training_data, true_mu, 2, None)
            # print out parameters of the Gaussian function
            for ind_3 in range(K_value):
                print('Cluster:', ind_3)
                print('Actual Mu:')
                print(out_param_true[str(ind_3)][0])
                print('Actual Covariance:')
                print(out_param_true[str(ind_3)][1])
                print('=====================')
        # drawing log-likelihood values
        drawing_Log_likelihood(best_likelihood)

    elif (sys.argv[1] == 'saving'):
        # save the data as a csv file
        save_data_csv_file(training_data)
    else:
        print('Error Input. Please re-choose the task!!')