def plot_consensus_percentile(consensus_count,consensus_total): """ Uses the 90th percentile plot. :param consensus_count: :param consensus_total: :return: """ consensus_count=sorted(consensus_count.items(),key=lambda entry:entry) num_classes=len(consensus_count) pyplot.figure(1) for c in range(0,num_classes): ax=pyplot.subplot(num_classes,1,c) genre_dict=consensus_count[c][1] genre_total_dict=consensus_total[c] genre_to_counts=[] for genre,count in genre_dict.items(): genre_to_counts.append((genre,count,genre_total_dict[genre])) genre_to_counts=sorted(genre_to_counts,key=lambda t:t[0]) pyplot.title("Consensus plot for Genre {}, total number of instances {}".format(c,sum(it.chain(*(g[2] for g in genre_to_counts)))/6)) #set up xaxis labels pyplot.xticks(list(range(1,len(genre_to_counts)+1)),[g[0] for g in genre_to_counts]) pyplot.tick_params(axis='both', which='major', labelsize=5) #now plot y axis for index,res in enumerate(genre_to_counts): graphics.add_bar_plot(index+1,res[1]) #pyplot.xticks(range(len(genre_to_counts)),["0"]+[g[0] for g in genre_to_counts],size= 5) pyplot.legend(loc="upper right") pyplot.tight_layout() path="C:\\Users\\Kevin\\Desktop\\GitHub\\Research\\Webscraper\\classification_res\\consensus_plots\\percentile_{}.pdf" graphics.save_fig(path.format(num_classes),pyplot) pyplot.close() print("Done")
def num_genre_per_webpage(matrix_path): """ Create a box plot of how many other genres each webpage has for each genre Also, record the occurence of genres with each other :param matrix_path: :return: """ label_matrix=unpickle_obj(matrix_path) genre_to_num_webpages=coll.defaultdict(lambda:[]) for webpage_genre in label_matrix: normalized_genre=set([normalize_genre_string(g,1) for g in webpage_genre]) for g in normalized_genre: if g in bad_genre_set: continue #if normalized_genre-{g}: genre_to_num_webpages[g].append(normalized_genre-{g}) #box plot it genre_to_num_item_iter=genre_to_num_webpages.items() plt.clf() plt.figure(1) plt.xticks([i for i in range(0,len(genre_to_num_item_iter))],[op.itemgetter(0)(i) for i in genre_to_num_item_iter]) plt.yticks(range(0,6)) plt.tick_params(axis="both",which="major",labelsize=5) for c,(g,counts) in enumerate(genre_to_num_item_iter): add_bar_plot(c,[ len(gs) for gs in counts]) plt.savefig("C:\\Users\\Kevin\\Desktop\\GitHub\\Research\\Webscraper\\classification_res\\genre_analysis\\genre_dist.pdf") #print print(genre_to_num_webpages)