def hierarchical_control(): print_line() Printer.indent_right() while True: Printer.print("Select Vector Space Type:") Printer.indent_right() Printer.print("""1. Word2Vec Space 2. Tf-Idf Space 0. Back""") Printer.indent_left() inp1 = input() if inp1 == '1': print_line() Printer.indent_right() Printer.print("Word2Vec Space Selected") output_list = [] while True: print_line() Printer.print("Enter Cluster Count: (Enter 0 for ending)") try: count = int(input()) except: invalid() continue if count <= 0: Printer.indent_left() break labels, model = cm.cluster_w2v_with_agglomerative_cluster(count) Printer.print(f'Clustering Finished') plt.title('Hierarchical Clustering Dendrogram') plot_dendrogram(model, truncate_mode='lastp', p=count) plt.show() elif inp1 == '2': print_line() Printer.indent_right() Printer.print("Tf-Idf Space Selected") output_list = [] while True: print_line() Printer.print("Enter Cluster Count: (Enter 0 for ending)") try: count = int(input()) except: invalid() continue if count <= 0: Printer.indent_left() break labels, model = cm.cluster_tf_idf_with_agglomerative_cluster(count) Printer.print(f'Clustering Finished') plt.title('Hierarchical Clustering Dendrogram') plot_dendrogram(model, truncate_mode='lastp', p=count) plt.show() elif inp1 == '0': Printer.indent_left() return else: invalid()
def invalid(): Printer.print("Invalid input!") print_line()
def k_means_control(): print_line() Printer.indent_right() while True: Printer.print("Select Vector Space Type:") Printer.indent_right() Printer.print("""1. Word2Vec Space 2. Tf-Idf Space 0. Back""") Printer.indent_left() inp1 = input() if inp1 == '1': print_line() Printer.indent_right() Printer.print("Word2Vec Space Selected") output_list = [] while True: print_line() Printer.print("Enter Cluster Count: (Enter 0 for ending)") try: count = int(input()) except: invalid() continue if count <= 0: Printer.indent_left() break inertia, labels, distances = cm.cluster_w2v_with_k_means(count) Printer.print(f'Clustering Finished, Inertia: {inertia}') output_list.append((count, inertia)) output_list.sort(key=lambda x: x[0]) labels = [] values = [] for output in output_list: labels.append(output[0]) values.append(output[1]) plt.title('K-Means Word2Vec Inertia Plot') plt.plot(labels, values, linewidth=2, color='blue', marker='o') plt.show() elif inp1 == '2': print_line() Printer.indent_right() Printer.print("Tf-Idf Space Selected") output_list = [] while True: print_line() Printer.print("Enter Cluster Count: (Enter 0 for ending)") try: count = int(input()) except: invalid() continue if count <= 0: Printer.indent_left() break inertia, labels, distances = cm.cluster_tf_idf_with_k_means(count) Printer.print(f'Clustering Finished, Inertia: {inertia}') output_list.append((count, inertia)) output_list.sort(key=lambda x: x[0]) labels = [] values = [] for output in output_list: labels.append(output[0]) values.append(output[1]) plt.title('K-Means Tf-Idf Inertia Plot') plt.plot(labels, values, linewidth=2, color='blue', marker='o') plt.show() elif inp1 == '0': Printer.indent_left() return else: invalid()
def gaussian_control(): print_line() Printer.indent_right() while True: Printer.print("Select Vector Space Type:") Printer.indent_right() Printer.print("""1. Word2Vec Space 2. Tf-Idf Space 0. Back""") Printer.indent_left() inp1 = input() if inp1 == '1': print_line() Printer.indent_right() Printer.print("Word2Vec Space Selected") output_list = [] while True: print_line() Printer.print("Enter Cluster Count: (Enter 0 for ending)") try: count = int(input()) except: invalid() continue if count <= 0: Printer.indent_left() break score, labels = cm.cluster_w2v_with_gaussian_mixture(count) total_score = np.sum(score) Printer.print(f'Clustering Finished, Score: {total_score}') output_list.append((count, total_score)) output_list.sort(key=lambda x: x[0]) labels = [] values = [] for output in output_list: labels.append(output[0]) values.append(output[1]) plt.title('Gaussian Mixture Word2Vec Score Plot') plt.plot(labels, values, linewidth=2, color='red', marker='o') plt.show() elif inp1 == '2': print_line() Printer.indent_right() Printer.print("Tf-Idf Space Selected") output_list = [] while True: print_line() Printer.print("Enter Cluster Count: (Enter 0 for ending)") try: count = int(input()) except: invalid() continue if count <= 0: Printer.indent_left() break score, labels = cm.cluster_tf_idf_with_gaussian_mixture(count) total_score = np.sum(score) Printer.print(f'Clustering Finished, Score: {total_score}') output_list.append((count, total_score)) output_list.sort(key=lambda x: x[0]) labels = [] values = [] for output in output_list: labels.append(output[0]) values.append(output[1]) plt.title('Gaussian Mixture Tf-Idf Score Plot') plt.plot(labels, values, linewidth=2, color='red', marker='o') plt.show() elif inp1 == '0': Printer.indent_left() return else: invalid()
plot_dendrogram(model, truncate_mode='lastp', p=count) plt.show() elif inp1 == '0': Printer.indent_left() return else: invalid() if __name__ == '__main__': cm = ClusterManager() cm.load_corpus('DataSet/corpus/Phase3_Data.csv', remove_mentions=True) cm.load_w2v_model('DataSet/models/deps.words', binary=False) cm.corpus_generate_tf_idf_model() cm.corpus_generate_word2vec_model() print_line() while True: Printer.print("Select Clustering Method:") Printer.indent_right() Printer.print("""1. K-Means 2. Gaussian Mixture 3. Hierarchical Clustering 0. Exit""") Printer.indent_left() selection = input() if selection == '1': k_means_control() elif selection == '2': gaussian_control() elif selection == '3':