def main(articles_file, topics): #get the topics topics_model = ut.get_the_topics_lst(topics) #get the training set headers, articles, words_freqs, articles_freqs = ut.make_train_set( articles_file) #divide the words into cluster words_into_clusters = ut.divide_clusters(articles) #get the good wights for our model w_model = em.run_em_algorithm(articles_freqs, words_freqs, words_into_clusters, len(topics_model)) #create the confuision matrix conf_matrix, clusters_and_topics, articles_of_clusters = ut.make_conf_matrix( w_model, articles_freqs, topics_model, headers) # conf_matrix_descending_order = sorted(conf_matrix, key=lambda line: line[-1], reverse=True) print conf_matrix #add the topic to the articles articles_by_topic = ut.add_tag_to_articles(clusters_and_topics, articles_of_clusters) #print empty line print "\n" #compute the accuracy of the model accuracy = ut.compute_accuracy(headers, articles_by_topic) print "the accuracy of our model is- ", accuracy