def single_class_mispredition_freq(res_path): """ Get the frequency of misprediction between single genre instances and the predicted genre :param res_path: :return: """ print("Loading Iter") wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path) right_res_iter=RightResultsIter.load_iter_from_file(res_path) genre_to_wrong_genre_count=coll.Counter() for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)): if c%500==0: print(c) actual=res_obj.actual #single genre if len(actual)==1 and actual[0] != res_obj.predicted[0]: genre_to_wrong_genre_count.update([(actual[0],res_obj.predicted[0])]) #plot plt=plot_word_frequency("Single Genre Mispredition",genre_to_wrong_genre_count) plt.tight_layout() save_fig("C:\\\\Users\\\\Kevin\\\\Desktop\\\\GitHub\\\\Research\\\\Webscraper\\\\classification_res\\\\genre_analysis\\\\single_miss.pdf", plt)
def plot_miss_per_genre(path,outpath,classifiers=None): """ Given the path to classification result folder of multiple classifier. produce a plot of the classifier's misses. :param path: the input folder where the classifiers' result(s) are :param classifiers: A set of classifier whose results to graph. Note that if none, all of classifier's results will be combined. :return: """ #grab the actual misses, counter in default dict in default dict. First layer for classifiers, second layer #is for correct genres, finally the counter is to count how many times it got miss classified as somethine else classifier_to_misses_genre=collections.defaultdict(lambda:collections.defaultdict(lambda:collections.Counter())) for true_miss in (w for w in WrongResultsIter(path,classifiers) if not w.is_swing_sample()): assert isinstance(true_miss,ClassificationResultInstance) classifier_to_misses_genre[true_miss.classifier][true_miss.__actual].update([true_miss.predicted]) #now plot each one, output to OUTPUT/classifier for classifier, actual_to_miss in classifier_to_misses_genre.items(): for actual_genre,miss_freq in actual_to_miss.items(): plt=plot_word_frequency("{}-{} Misclassifications".format(classifier,actual_genre),miss_freq,plot_top=len(miss_freq)) out_path=os.path.join(outpath,classifier) if not os.path.exists(out_path): os.mkdir(out_path) save_fig("{}/{}_miss_true.pdf".format(out_path,actual_genre),plt) plt.close()
def plot_consensus_percentile(consensus_count,consensus_total): """ Uses the 90th percentile plot. :param consensus_count: :param consensus_total: :return: """ consensus_count=sorted(consensus_count.items(),key=lambda entry:entry) num_classes=len(consensus_count) pyplot.figure(1) for c in range(0,num_classes): ax=pyplot.subplot(num_classes,1,c) genre_dict=consensus_count[c][1] genre_total_dict=consensus_total[c] genre_to_counts=[] for genre,count in genre_dict.items(): genre_to_counts.append((genre,count,genre_total_dict[genre])) genre_to_counts=sorted(genre_to_counts,key=lambda t:t[0]) pyplot.title("Consensus plot for Genre {}, total number of instances {}".format(c,sum(it.chain(*(g[2] for g in genre_to_counts)))/6)) #set up xaxis labels pyplot.xticks(list(range(1,len(genre_to_counts)+1)),[g[0] for g in genre_to_counts]) pyplot.tick_params(axis='both', which='major', labelsize=5) #now plot y axis for index,res in enumerate(genre_to_counts): graphics.add_bar_plot(index+1,res[1]) #pyplot.xticks(range(len(genre_to_counts)),["0"]+[g[0] for g in genre_to_counts],size= 5) pyplot.legend(loc="upper right") pyplot.tight_layout() path="C:\\Users\\Kevin\\Desktop\\GitHub\\Research\\Webscraper\\classification_res\\consensus_plots\\percentile_{}.pdf" graphics.save_fig(path.format(num_classes),pyplot) pyplot.close() print("Done")
def plot_total_consensus(consensus_count,consensus_total): consensus_count=sorted(consensus_count.items(),key=lambda entry:entry) num_classes=len(consensus_count) pyplot.close() pyplot.figure(1) for c in range(0,num_classes): ax=pyplot.subplot(num_classes,1,c) genre_dict=consensus_count[c][1] genre_total_dict=consensus_total[c] genre_to_counts=[] for genre,count in genre_dict.items(): genre_to_counts.append((genre,sum(count),sum(genre_total_dict[genre]))) genre_to_counts=sorted(genre_to_counts,key=lambda t:t[0]) pyplot.hold(True) pyplot.title("Consensus plot for Genre {}".format(c)) pyplot.bar(range(len(genre_to_counts)),[g[2] for g in genre_to_counts],color='#deb0b0',label="Consensus Total",align='center') pyplot.bar(range(len(genre_to_counts)),[g[1] for g in genre_to_counts],color='#b0c4de',label="Consensus Counts",align='center') pyplot.xticks(range(len(genre_to_counts)),[g[0] for g in genre_to_counts],size= 5) legend=pyplot.legend(loc="upper right") legend.set_visible(False) pyplot.hold(False) path="C:\\Users\\Kevin\\Desktop\\GitHub\\Research\\Webscraper\\classification_res\\consensus_plots\\total_{}.pdf" graphics.save_fig(path.format(num_classes),pyplot) pyplot.close() print("Done") print("Done")
def get_all_mi_and_plot(reversed=False): """ Grab all mutual information data from the database collection MutualInformation and plot them with matlibplot :return: None! """ # graphics.plot_save_all_genre() mi = MutualInformation() for mi_obj in mi.iterable(): genre = mi_obj["short_genre"] bow_mi = mi_obj["bow"] filtered_bow_mit = {} for k, v in bow_mi.items(): if not k.isdigit(): filtered_bow_mit[k] = v plt = graphics.plot_word_frequency(genre, filtered_bow_mit, reversed=reversed) graphics.save_fig("graphs/{}.pdf".format(("reversed_" if reversed else "") + genre.replace("/", "_")), plt) print(genre)