def multi_class_misprediction_freq(res_folder):
    """
    Look at multi class instances that are frequently mispredicted

    :param res_folder:
    :return:
    """


    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_folder)
    right_res_iter=RightResultsIter.load_iter_from_file(res_folder)

    genre_to_wrong_genre_count=coll.Counter()
    right_count=0
    for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)):
        if c%500==0:
            print(c)

        actual=set(res_obj.actual)

        #check multiple genres
        if len(actual)>1:
            if not set(actual) <= set(res_obj.predicted[:len(actual)]):
                genre_to_wrong_genre_count.update([(tuple(actual),tuple(res_obj.predicted))])

                right_count+=1

    #sort the whole thing
    sorted_genre_to_wrong=sorted(genre_to_wrong_genre_count.items(),key=op.itemgetter(1),reverse=True)

    print(sorted_genre_to_wrong)
    print(right_count)
def frequently_predicted_class(res_path,top_x=2):
    """
    Top x frequently predicted together class. The tuple of genre and genre is sorted so there is no repeats.

    :param res_path:
    :param top_x:
    :return:
    """

    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path)
    right_res_iter=RightResultsIter.load_iter_from_file(res_path)

    predicted_counter=coll.Counter()
    actual_counter=coll.Counter()


    predicted_counter.update((tuple(sorted(p)) for p in (
        res_obj.predicted[:top_x] for res_obj in it.chain(wrong_res_iter,right_res_iter) if len(res_obj.actual)>1)

                              ))
    actual_counter.update((tuple(sorted(p)) for p in (
        tuple(set(res_obj.actual)) for res_obj in it.chain(wrong_res_iter,right_res_iter) if len(set(res_obj.actual))>1)

                           ))

    print("Predicted")
    print(predicted_counter)
    print("Actual")
    print(actual_counter)
def single_class_mispredition_freq(res_path):
    """
    Get the frequency of misprediction between single genre instances and the predicted genre

    :param res_path:
    :return:
    """

    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path)
    right_res_iter=RightResultsIter.load_iter_from_file(res_path)

    genre_to_wrong_genre_count=coll.Counter()
    for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)):
        if c%500==0:
            print(c)

        actual=res_obj.actual

        #single genre
        if len(actual)==1 and actual[0] != res_obj.predicted[0]:
            genre_to_wrong_genre_count.update([(actual[0],res_obj.predicted[0])])

    #plot
    plt=plot_word_frequency("Single Genre Mispredition",genre_to_wrong_genre_count)
    plt.tight_layout()
    save_fig("C:\\\\Users\\\\Kevin\\\\Desktop\\\\GitHub\\\\Research\\\\Webscraper\\\\classification_res\\\\genre_analysis\\\\single_miss.pdf",
             plt)
def top_level_cdf(res_folder):
    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_folder)
    right_res_iter=RightResultsIter.load_iter_from_file(res_folder)

    dist_count={}
    for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)):
        dist_count[len(set(res_obj.actual))]=dist_count.get(len(set(res_obj.actual)),0)+1

    print(dist_count)
def consensus_class(res_path,top_prediction=2,filter_func=lambda x:len(x)==4):
    """
    Calculates the portions of the predictions that agrees with the classes's multiple classes

    For example: if


    :param res_path:
    :return:
    """
    #dictionary to hold the xth class and the number of agreements for it
    consensus_count=coll.defaultdict(lambda:0)
    consensus_total=coll.defaultdict(lambda:0)

    ref_id_to_pred_and_actual={}

    print("Loading Iter")

    #wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path)
    right_res_iter=RightResultsIter.load_iter_from_file(res_path)

    #gather together all of an instance's data and id
    right_res_instances=coll.defaultdict(lambda: [])

    for res_obj in right_res_iter:
        right_res_instances[res_obj.ref_id].append(res_obj)


    #just grab the mispredictions
    for ref_id,list_of_res_obj in right_res_instances.items():
        actual=tuple(set(list_of_res_obj[0].actual))

        #Todo:change here
        if not filter_func(actual):
            continue

        genre_hit_count=[0]*len(actual)

        genre_by_consensus=sorted([sum(g in set(res_obj.predicted[:top_prediction]) for res_obj in list_of_res_obj) for g in actual]
               ,reverse=True)


        for c,g_hit in enumerate(genre_by_consensus):
            genre_hit_count[c]+=g_hit

        #now we sort the the hit for each class
        #genre_hit_count=sorted(genre_hit_count,reverse=True)

        for class_num in range(0,len(actual)):
            consensus_count[class_num]+=genre_hit_count[class_num]
            consensus_total[class_num]+=6

    print(sorted(consensus_count.items()))
    print(sorted(consensus_total.items()))
def consensus_class_per_genre(res_path,top_prediction=2,filter_func=lambda x:len(x)==4):
    """
    Get the consensus for each genre, return the list

    :param res_path:
    :param top_prediction:
    :param filter_func:
    :return:
    """
    #dictionary to hold the xth class and the number of agreements for it
    consensus_count=coll.defaultdict(lambda:coll.defaultdict(lambda:[]))
    consensus_total=coll.defaultdict(lambda:coll.defaultdict(lambda:[]))

    ref_id_to_pred_and_actual={}
    num_classes=0

    print("Loading Iter")

    #wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path)
    right_res_iter=RightResultsIter.load_iter_from_file(res_path)

    #gather together all of an instance's data and id
    right_res_instances=coll.defaultdict(lambda: [])

    for res_obj in right_res_iter:
        right_res_instances[res_obj.ref_id].append(res_obj)


    #just grab the mispredictions
    for ref_id,list_of_res_obj in right_res_instances.items():
        actual=tuple(set(list_of_res_obj[0].actual))

        #Todo:change here
        if not filter_func(actual):
            continue

        genre_consensus={g:sum(g in set(res_obj.predicted[:top_prediction]) for res_obj in list_of_res_obj) for g in actual}
        num_classes=len(genre_consensus)

        for index,(g,count) in enumerate(sorted(genre_consensus.items(),key=op.itemgetter(1),reverse=True)):
            consensus_count[index][g].append(count)
            consensus_total[index][g].append(6)

        #now we sort the the hit for each class
        #genre_hit_count=sorted(genre_hit_count,reverse=True)



    return consensus_count,consensus_total