예제 #1
0
def single_class_mispredition_freq(res_path):
    """
    Get the frequency of misprediction between single genre instances and the predicted genre

    :param res_path:
    :return:
    """

    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path)
    right_res_iter=RightResultsIter.load_iter_from_file(res_path)

    genre_to_wrong_genre_count=coll.Counter()
    for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)):
        if c%500==0:
            print(c)

        actual=res_obj.actual

        #single genre
        if len(actual)==1 and actual[0] != res_obj.predicted[0]:
            genre_to_wrong_genre_count.update([(actual[0],res_obj.predicted[0])])

    #plot
    plt=plot_word_frequency("Single Genre Mispredition",genre_to_wrong_genre_count)
    plt.tight_layout()
    save_fig("C:\\\\Users\\\\Kevin\\\\Desktop\\\\GitHub\\\\Research\\\\Webscraper\\\\classification_res\\\\genre_analysis\\\\single_miss.pdf",
             plt)
예제 #2
0
def plot_miss_per_genre(path,outpath,classifiers=None):
    """
    Given the path to classification result folder of multiple classifier.

    produce a plot of the classifier's misses.

    :param path: the input folder where the classifiers' result(s) are
    :param classifiers: A set of classifier whose results to graph. Note that if none, all of classifier's
        results will be combined.

    :return:
    """

    #grab the actual misses, counter in default dict in default dict. First layer for classifiers, second layer
    #is for correct genres, finally the counter is to count how many times it got miss classified as somethine else
    classifier_to_misses_genre=collections.defaultdict(lambda:collections.defaultdict(lambda:collections.Counter()))
    for true_miss in (w for w in WrongResultsIter(path,classifiers) if not w.is_swing_sample()):
        assert isinstance(true_miss,ClassificationResultInstance)

        classifier_to_misses_genre[true_miss.classifier][true_miss.__actual].update([true_miss.predicted])


    #now plot each one, output to OUTPUT/classifier
    for classifier, actual_to_miss in classifier_to_misses_genre.items():
        for actual_genre,miss_freq in actual_to_miss.items():
            plt=plot_word_frequency("{}-{} Misclassifications".format(classifier,actual_genre),miss_freq,plot_top=len(miss_freq))

            out_path=os.path.join(outpath,classifier)
            if not os.path.exists(out_path):
                os.mkdir(out_path)

            save_fig("{}/{}_miss_true.pdf".format(out_path,actual_genre),plt)
            plt.close()
예제 #3
0
def plot_consensus_percentile(consensus_count,consensus_total):
    """
    Uses the 90th percentile plot.

    :param consensus_count:
    :param consensus_total:
    :return:
    """

    consensus_count=sorted(consensus_count.items(),key=lambda entry:entry)
    num_classes=len(consensus_count)

    pyplot.figure(1)
    for c in range(0,num_classes):

        ax=pyplot.subplot(num_classes,1,c)
        genre_dict=consensus_count[c][1]
        genre_total_dict=consensus_total[c]

        genre_to_counts=[]

        for genre,count in genre_dict.items():
            genre_to_counts.append((genre,count,genre_total_dict[genre]))

        genre_to_counts=sorted(genre_to_counts,key=lambda t:t[0])

        pyplot.title("Consensus plot for Genre {}, total number of instances {}".format(c,sum(it.chain(*(g[2] for g in genre_to_counts)))/6))

        #set up xaxis labels
        pyplot.xticks(list(range(1,len(genre_to_counts)+1)),[g[0] for g in genre_to_counts])
        pyplot.tick_params(axis='both', which='major', labelsize=5)

        #now plot y axis
        for index,res in enumerate(genre_to_counts):
            graphics.add_bar_plot(index+1,res[1])

        #pyplot.xticks(range(len(genre_to_counts)),["0"]+[g[0] for g in genre_to_counts],size= 5)
        pyplot.legend(loc="upper right")


    pyplot.tight_layout()

    path="C:\\Users\\Kevin\\Desktop\\GitHub\\Research\\Webscraper\\classification_res\\consensus_plots\\percentile_{}.pdf"
    graphics.save_fig(path.format(num_classes),pyplot)
    pyplot.close()

    print("Done")
예제 #4
0
def plot_total_consensus(consensus_count,consensus_total):
    consensus_count=sorted(consensus_count.items(),key=lambda entry:entry)
    num_classes=len(consensus_count)

    pyplot.close()
    pyplot.figure(1)

    for c in range(0,num_classes):

        ax=pyplot.subplot(num_classes,1,c)
        genre_dict=consensus_count[c][1]
        genre_total_dict=consensus_total[c]

        genre_to_counts=[]

        for genre,count in genre_dict.items():
            genre_to_counts.append((genre,sum(count),sum(genre_total_dict[genre])))

        genre_to_counts=sorted(genre_to_counts,key=lambda t:t[0])

        pyplot.hold(True)
        pyplot.title("Consensus plot for Genre {}".format(c))


        pyplot.bar(range(len(genre_to_counts)),[g[2] for g in genre_to_counts],color='#deb0b0',label="Consensus Total",align='center')
        pyplot.bar(range(len(genre_to_counts)),[g[1] for g in genre_to_counts],color='#b0c4de',label="Consensus Counts",align='center')

        pyplot.xticks(range(len(genre_to_counts)),[g[0] for g in genre_to_counts],size= 5)
        legend=pyplot.legend(loc="upper right")
        legend.set_visible(False)
        pyplot.hold(False)


    path="C:\\Users\\Kevin\\Desktop\\GitHub\\Research\\Webscraper\\classification_res\\consensus_plots\\total_{}.pdf"
    graphics.save_fig(path.format(num_classes),pyplot)
    pyplot.close()

    print("Done")

    print("Done")
예제 #5
0
def get_all_mi_and_plot(reversed=False):
    """
    Grab all mutual information data from the database collection MutualInformation and plot them with matlibplot

    :return: None!
    """
    # graphics.plot_save_all_genre()
    mi = MutualInformation()

    for mi_obj in mi.iterable():

        genre = mi_obj["short_genre"]
        bow_mi = mi_obj["bow"]

        filtered_bow_mit = {}
        for k, v in bow_mi.items():
            if not k.isdigit():
                filtered_bow_mit[k] = v

        plt = graphics.plot_word_frequency(genre, filtered_bow_mit, reversed=reversed)
        graphics.save_fig("graphs/{}.pdf".format(("reversed_" if reversed else "") + genre.replace("/", "_")), plt)

        print(genre)