Ejemplo n.º 1
0
def hierarchical_control():
    print_line()
    Printer.indent_right()
    while True:
        Printer.print("Select Vector Space Type:")
        Printer.indent_right()
        Printer.print("""1. Word2Vec Space
                2. Tf-Idf Space

                0. Back""")
        Printer.indent_left()
        inp1 = input()
        if inp1 == '1':
            print_line()
            Printer.indent_right()
            Printer.print("Word2Vec Space Selected")
            output_list = []
            while True:
                print_line()
                Printer.print("Enter Cluster Count: (Enter 0 for ending)")
                try:
                    count = int(input())
                except:
                    invalid()
                    continue
                if count <= 0:
                    Printer.indent_left()
                    break
                labels, model = cm.cluster_w2v_with_agglomerative_cluster(count)
                Printer.print(f'Clustering Finished')
                plt.title('Hierarchical Clustering Dendrogram')
                plot_dendrogram(model, truncate_mode='lastp', p=count)
                plt.show()

        elif inp1 == '2':
            print_line()
            Printer.indent_right()
            Printer.print("Tf-Idf Space Selected")
            output_list = []
            while True:
                print_line()
                Printer.print("Enter Cluster Count: (Enter 0 for ending)")
                try:
                    count = int(input())
                except:
                    invalid()
                    continue
                if count <= 0:
                    Printer.indent_left()
                    break
                labels, model = cm.cluster_tf_idf_with_agglomerative_cluster(count)
                Printer.print(f'Clustering Finished')
                plt.title('Hierarchical Clustering Dendrogram')
                plot_dendrogram(model, truncate_mode='lastp', p=count)
                plt.show()
        elif inp1 == '0':
            Printer.indent_left()
            return
        else:
            invalid()
Ejemplo n.º 2
0
def invalid():
    Printer.print("Invalid input!")
    print_line()
Ejemplo n.º 3
0
def k_means_control():
    print_line()
    Printer.indent_right()
    while True:
        Printer.print("Select Vector Space Type:")
        Printer.indent_right()
        Printer.print("""1. Word2Vec Space
        2. Tf-Idf Space
        
        0. Back""")
        Printer.indent_left()
        inp1 = input()
        if inp1 == '1':
            print_line()
            Printer.indent_right()
            Printer.print("Word2Vec Space Selected")
            output_list = []
            while True:
                print_line()
                Printer.print("Enter Cluster Count: (Enter 0 for ending)")
                try:
                    count = int(input())
                except:
                    invalid()
                    continue
                if count <= 0:
                    Printer.indent_left()
                    break
                inertia, labels, distances = cm.cluster_w2v_with_k_means(count)
                Printer.print(f'Clustering Finished, Inertia: {inertia}')
                output_list.append((count, inertia))
                output_list.sort(key=lambda x: x[0])
                labels = []
                values = []
                for output in output_list:
                    labels.append(output[0])
                    values.append(output[1])
                plt.title('K-Means Word2Vec Inertia Plot')
                plt.plot(labels, values, linewidth=2, color='blue', marker='o')
                plt.show()
        elif inp1 == '2':
            print_line()
            Printer.indent_right()
            Printer.print("Tf-Idf Space Selected")
            output_list = []
            while True:
                print_line()
                Printer.print("Enter Cluster Count: (Enter 0 for ending)")
                try:
                    count = int(input())
                except:
                    invalid()
                    continue
                if count <= 0:
                    Printer.indent_left()
                    break
                inertia, labels, distances = cm.cluster_tf_idf_with_k_means(count)
                Printer.print(f'Clustering Finished, Inertia: {inertia}')
                output_list.append((count, inertia))
                output_list.sort(key=lambda x: x[0])
                labels = []
                values = []
                for output in output_list:
                    labels.append(output[0])
                    values.append(output[1])
                plt.title('K-Means Tf-Idf Inertia Plot')
                plt.plot(labels, values, linewidth=2, color='blue', marker='o')
                plt.show()
        elif inp1 == '0':
            Printer.indent_left()
            return
        else:
            invalid()
Ejemplo n.º 4
0
def gaussian_control():
    print_line()
    Printer.indent_right()
    while True:
        Printer.print("Select Vector Space Type:")
        Printer.indent_right()
        Printer.print("""1. Word2Vec Space
            2. Tf-Idf Space

            0. Back""")
        Printer.indent_left()
        inp1 = input()
        if inp1 == '1':
            print_line()
            Printer.indent_right()
            Printer.print("Word2Vec Space Selected")
            output_list = []
            while True:
                print_line()
                Printer.print("Enter Cluster Count: (Enter 0 for ending)")
                try:
                    count = int(input())
                except:
                    invalid()
                    continue
                if count <= 0:
                    Printer.indent_left()
                    break
                score, labels = cm.cluster_w2v_with_gaussian_mixture(count)
                total_score = np.sum(score)
                Printer.print(f'Clustering Finished, Score: {total_score}')
                output_list.append((count, total_score))
                output_list.sort(key=lambda x: x[0])
                labels = []
                values = []
                for output in output_list:
                    labels.append(output[0])
                    values.append(output[1])
                plt.title('Gaussian Mixture Word2Vec Score Plot')
                plt.plot(labels, values, linewidth=2, color='red', marker='o')
                plt.show()
        elif inp1 == '2':
            print_line()
            Printer.indent_right()
            Printer.print("Tf-Idf Space Selected")
            output_list = []
            while True:
                print_line()
                Printer.print("Enter Cluster Count: (Enter 0 for ending)")
                try:
                    count = int(input())
                except:
                    invalid()
                    continue
                if count <= 0:
                    Printer.indent_left()
                    break
                score, labels = cm.cluster_tf_idf_with_gaussian_mixture(count)
                total_score = np.sum(score)
                Printer.print(f'Clustering Finished, Score: {total_score}')
                output_list.append((count, total_score))
                output_list.sort(key=lambda x: x[0])
                labels = []
                values = []
                for output in output_list:
                    labels.append(output[0])
                    values.append(output[1])
                plt.title('Gaussian Mixture Tf-Idf Score Plot')
                plt.plot(labels, values, linewidth=2, color='red', marker='o')
                plt.show()
        elif inp1 == '0':
            Printer.indent_left()
            return
        else:
            invalid()
Ejemplo n.º 5
0
                plot_dendrogram(model, truncate_mode='lastp', p=count)
                plt.show()
        elif inp1 == '0':
            Printer.indent_left()
            return
        else:
            invalid()


if __name__ == '__main__':
    cm = ClusterManager()
    cm.load_corpus('DataSet/corpus/Phase3_Data.csv', remove_mentions=True)
    cm.load_w2v_model('DataSet/models/deps.words', binary=False)
    cm.corpus_generate_tf_idf_model()
    cm.corpus_generate_word2vec_model()
    print_line()
    while True:
        Printer.print("Select Clustering Method:")
        Printer.indent_right()
        Printer.print("""1. K-Means
        2. Gaussian Mixture
        3. Hierarchical Clustering
        
        0. Exit""")
        Printer.indent_left()
        selection = input()
        if selection == '1':
            k_means_control()
        elif selection == '2':
            gaussian_control()
        elif selection == '3':