예제 #1
0
    def generate_cluster_distribution_graphs(self,res_file,occurence_dict,res_labels):
        """
        Generate graphics of each cluster and the genre distribution from the occurence dictionary of genre in each
            cluste, res_labels for the training set, and save the graph to the res_file

        :param res_file:
        :param occurence_dict:
        :param res_labels:
        :return:
        """
        with PdfPages(res_file) as pdf:
            plt_num=0
            save_fig=False
            figure=None
            for cluster_name,cluster_genre_freq in occurence_dict.items():

                save_fig=True
                last_plt,figure=subplot_four_corner(plt_num)

                #axis=plt.subplot(1,1,plt_num)
                num_samples=np.sum(res_labels==cluster_name)
                print("Total number of samples in cluster {} is {}".format(cluster_name,num_samples))

                plot_word_frequency("cluster {}, num samples: {}".format(cluster_name,num_samples),
                                    cluster_genre_freq)
                plt_num+=1
                if last_plt:
                    save_fig=False
                    pdf.savefig(figure)
                    plt.close()

            if figure is not None and save_fig:
                pdf.savefig(figure)
                plt.close()
예제 #2
0
def single_class_mispredition_freq(res_path):
    """
    Get the frequency of misprediction between single genre instances and the predicted genre

    :param res_path:
    :return:
    """

    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path)
    right_res_iter=RightResultsIter.load_iter_from_file(res_path)

    genre_to_wrong_genre_count=coll.Counter()
    for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)):
        if c%500==0:
            print(c)

        actual=res_obj.actual

        #single genre
        if len(actual)==1 and actual[0] != res_obj.predicted[0]:
            genre_to_wrong_genre_count.update([(actual[0],res_obj.predicted[0])])

    #plot
    plt=plot_word_frequency("Single Genre Mispredition",genre_to_wrong_genre_count)
    plt.tight_layout()
    save_fig("C:\\\\Users\\\\Kevin\\\\Desktop\\\\GitHub\\\\Research\\\\Webscraper\\\\classification_res\\\\genre_analysis\\\\single_miss.pdf",
             plt)
예제 #3
0
def plot_miss_per_genre(path,outpath,classifiers=None):
    """
    Given the path to classification result folder of multiple classifier.

    produce a plot of the classifier's misses.

    :param path: the input folder where the classifiers' result(s) are
    :param classifiers: A set of classifier whose results to graph. Note that if none, all of classifier's
        results will be combined.

    :return:
    """

    #grab the actual misses, counter in default dict in default dict. First layer for classifiers, second layer
    #is for correct genres, finally the counter is to count how many times it got miss classified as somethine else
    classifier_to_misses_genre=collections.defaultdict(lambda:collections.defaultdict(lambda:collections.Counter()))
    for true_miss in (w for w in WrongResultsIter(path,classifiers) if not w.is_swing_sample()):
        assert isinstance(true_miss,ClassificationResultInstance)

        classifier_to_misses_genre[true_miss.classifier][true_miss.__actual].update([true_miss.predicted])


    #now plot each one, output to OUTPUT/classifier
    for classifier, actual_to_miss in classifier_to_misses_genre.items():
        for actual_genre,miss_freq in actual_to_miss.items():
            plt=plot_word_frequency("{}-{} Misclassifications".format(classifier,actual_genre),miss_freq,plot_top=len(miss_freq))

            out_path=os.path.join(outpath,classifier)
            if not os.path.exists(out_path):
                os.mkdir(out_path)

            save_fig("{}/{}_miss_true.pdf".format(out_path,actual_genre),plt)
            plt.close()
예제 #4
0
def get_all_mi_and_plot(reversed=False):
    """
    Grab all mutual information data from the database collection MutualInformation and plot them with matlibplot

    :return: None!
    """
    # graphics.plot_save_all_genre()
    mi = MutualInformation()

    for mi_obj in mi.iterable():

        genre = mi_obj["short_genre"]
        bow_mi = mi_obj["bow"]

        filtered_bow_mit = {}
        for k, v in bow_mi.items():
            if not k.isdigit():
                filtered_bow_mit[k] = v

        plt = graphics.plot_word_frequency(genre, filtered_bow_mit, reversed=reversed)
        graphics.save_fig("graphs/{}.pdf".format(("reversed_" if reversed else "") + genre.replace("/", "_")), plt)

        print(genre)