import pymongo
from function import count_occurences_field

client = pymongo.MongoClient("localhost", 27017) 
db = client.phoenix

##collections
genre = db.genre
event = db.event
total_number_of_events = event.count()

##make the histogram
field_list = ["metadata.live", "metadata.new_episode", "metadata.new_serie", "metadata.premiere"]

for fieldname in field_list:
    data = count_occurences_field(event,fieldname)
    x = data.keys()
    y = data.values()


    pos = np.arange(len(x))+0.5

    plt.figure()
    plt.barh(x, y)
    plt.axvline(total_number_of_events,linestyle="dashed",color="black")
    plt.xlabel("Counts")
    plt.ylabel(fieldname)
    plt.yticks(pos,x)
    plt.savefig("figures/histogram_"+fieldname+".pdf")

        os.mkdir(root_output+c)

    ##get keys
    current_collection = db[c]
    document_example = current_collection.find_one()
    list_keys = get_allkeys(document_example) #get a random example (might not contain all the keys accross all documents of the current collection)
    total_number_of_documents = current_collection.find().count()
    
    for k in list_keys:
        try:
            counts_distinct = len(current_collection.distinct(k))
        except pymongo.errors.OperationFailure: #if returns too many documents
            print "operationfailure"

        if counts_distinct <= threshold:
            data = count_occurences_field(current_collection,k)
            x = data.keys()
            y = data.values()
            pos = np.arange(len(x))+0.5

            if len(x) > 1: #do not plot if only 1 element
                if not isinstance(x[0],float): #keep float for plot
                    x = [type_to_string(i) for i in data.keys()]
                    x = [i.decode('utf-8') for i in x]                    

                plt.figure()
                plt.barh(pos, y)
                plt.axvline(total_number_of_documents,linestyle="dashed",color="black")
                plt.xlabel("Counts")
                plt.ylabel(k)
                plt.yticks(pos+0.5,x)