コード例 #1
0
def plot_consensus_percentile(consensus_count,consensus_total):
    """
    Uses the 90th percentile plot.

    :param consensus_count:
    :param consensus_total:
    :return:
    """

    consensus_count=sorted(consensus_count.items(),key=lambda entry:entry)
    num_classes=len(consensus_count)

    pyplot.figure(1)
    for c in range(0,num_classes):

        ax=pyplot.subplot(num_classes,1,c)
        genre_dict=consensus_count[c][1]
        genre_total_dict=consensus_total[c]

        genre_to_counts=[]

        for genre,count in genre_dict.items():
            genre_to_counts.append((genre,count,genre_total_dict[genre]))

        genre_to_counts=sorted(genre_to_counts,key=lambda t:t[0])

        pyplot.title("Consensus plot for Genre {}, total number of instances {}".format(c,sum(it.chain(*(g[2] for g in genre_to_counts)))/6))

        #set up xaxis labels
        pyplot.xticks(list(range(1,len(genre_to_counts)+1)),[g[0] for g in genre_to_counts])
        pyplot.tick_params(axis='both', which='major', labelsize=5)

        #now plot y axis
        for index,res in enumerate(genre_to_counts):
            graphics.add_bar_plot(index+1,res[1])

        #pyplot.xticks(range(len(genre_to_counts)),["0"]+[g[0] for g in genre_to_counts],size= 5)
        pyplot.legend(loc="upper right")


    pyplot.tight_layout()

    path="C:\\Users\\Kevin\\Desktop\\GitHub\\Research\\Webscraper\\classification_res\\consensus_plots\\percentile_{}.pdf"
    graphics.save_fig(path.format(num_classes),pyplot)
    pyplot.close()

    print("Done")
コード例 #2
0
ファイル: genre_count.py プロジェクト: wangk1/research
def num_genre_per_webpage(matrix_path):
    """
    Create a box plot of how many other genres each webpage has for each genre

    Also, record the occurence of genres with each other
    :param matrix_path:
    :return:
    """

    label_matrix=unpickle_obj(matrix_path)

    genre_to_num_webpages=coll.defaultdict(lambda:[])

    for webpage_genre in label_matrix:

        normalized_genre=set([normalize_genre_string(g,1) for g in webpage_genre])

        for g in normalized_genre:
            if g in bad_genre_set:
                continue

            #if normalized_genre-{g}:
            genre_to_num_webpages[g].append(normalized_genre-{g})


    #box plot it
    genre_to_num_item_iter=genre_to_num_webpages.items()

    plt.clf()
    plt.figure(1)

    plt.xticks([i for i in range(0,len(genre_to_num_item_iter))],[op.itemgetter(0)(i) for i in genre_to_num_item_iter])
    plt.yticks(range(0,6))
    plt.tick_params(axis="both",which="major",labelsize=5)

    for c,(g,counts) in enumerate(genre_to_num_item_iter):
        add_bar_plot(c,[ len(gs) for gs in counts])

    plt.savefig("C:\\Users\\Kevin\\Desktop\\GitHub\\Research\\Webscraper\\classification_res\\genre_analysis\\genre_dist.pdf")
    #print
    print(genre_to_num_webpages)