def generate_cluster_distribution_graphs(self,res_file,occurence_dict,res_labels): """ Generate graphics of each cluster and the genre distribution from the occurence dictionary of genre in each cluste, res_labels for the training set, and save the graph to the res_file :param res_file: :param occurence_dict: :param res_labels: :return: """ with PdfPages(res_file) as pdf: plt_num=0 save_fig=False figure=None for cluster_name,cluster_genre_freq in occurence_dict.items(): save_fig=True last_plt,figure=subplot_four_corner(plt_num) #axis=plt.subplot(1,1,plt_num) num_samples=np.sum(res_labels==cluster_name) print("Total number of samples in cluster {} is {}".format(cluster_name,num_samples)) plot_word_frequency("cluster {}, num samples: {}".format(cluster_name,num_samples), cluster_genre_freq) plt_num+=1 if last_plt: save_fig=False pdf.savefig(figure) plt.close() if figure is not None and save_fig: pdf.savefig(figure) plt.close()
def single_class_mispredition_freq(res_path): """ Get the frequency of misprediction between single genre instances and the predicted genre :param res_path: :return: """ print("Loading Iter") wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path) right_res_iter=RightResultsIter.load_iter_from_file(res_path) genre_to_wrong_genre_count=coll.Counter() for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)): if c%500==0: print(c) actual=res_obj.actual #single genre if len(actual)==1 and actual[0] != res_obj.predicted[0]: genre_to_wrong_genre_count.update([(actual[0],res_obj.predicted[0])]) #plot plt=plot_word_frequency("Single Genre Mispredition",genre_to_wrong_genre_count) plt.tight_layout() save_fig("C:\\\\Users\\\\Kevin\\\\Desktop\\\\GitHub\\\\Research\\\\Webscraper\\\\classification_res\\\\genre_analysis\\\\single_miss.pdf", plt)
def plot_miss_per_genre(path,outpath,classifiers=None): """ Given the path to classification result folder of multiple classifier. produce a plot of the classifier's misses. :param path: the input folder where the classifiers' result(s) are :param classifiers: A set of classifier whose results to graph. Note that if none, all of classifier's results will be combined. :return: """ #grab the actual misses, counter in default dict in default dict. First layer for classifiers, second layer #is for correct genres, finally the counter is to count how many times it got miss classified as somethine else classifier_to_misses_genre=collections.defaultdict(lambda:collections.defaultdict(lambda:collections.Counter())) for true_miss in (w for w in WrongResultsIter(path,classifiers) if not w.is_swing_sample()): assert isinstance(true_miss,ClassificationResultInstance) classifier_to_misses_genre[true_miss.classifier][true_miss.__actual].update([true_miss.predicted]) #now plot each one, output to OUTPUT/classifier for classifier, actual_to_miss in classifier_to_misses_genre.items(): for actual_genre,miss_freq in actual_to_miss.items(): plt=plot_word_frequency("{}-{} Misclassifications".format(classifier,actual_genre),miss_freq,plot_top=len(miss_freq)) out_path=os.path.join(outpath,classifier) if not os.path.exists(out_path): os.mkdir(out_path) save_fig("{}/{}_miss_true.pdf".format(out_path,actual_genre),plt) plt.close()
def get_all_mi_and_plot(reversed=False): """ Grab all mutual information data from the database collection MutualInformation and plot them with matlibplot :return: None! """ # graphics.plot_save_all_genre() mi = MutualInformation() for mi_obj in mi.iterable(): genre = mi_obj["short_genre"] bow_mi = mi_obj["bow"] filtered_bow_mit = {} for k, v in bow_mi.items(): if not k.isdigit(): filtered_bow_mit[k] = v plt = graphics.plot_word_frequency(genre, filtered_bow_mit, reversed=reversed) graphics.save_fig("graphs/{}.pdf".format(("reversed_" if reversed else "") + genre.replace("/", "_")), plt) print(genre)