import pymongo from function import count_occurences_field client = pymongo.MongoClient("localhost", 27017) db = client.phoenix ##collections genre = db.genre event = db.event total_number_of_events = event.count() ##make the histogram field_list = ["metadata.live", "metadata.new_episode", "metadata.new_serie", "metadata.premiere"] for fieldname in field_list: data = count_occurences_field(event,fieldname) x = data.keys() y = data.values() pos = np.arange(len(x))+0.5 plt.figure() plt.barh(x, y) plt.axvline(total_number_of_events,linestyle="dashed",color="black") plt.xlabel("Counts") plt.ylabel(fieldname) plt.yticks(pos,x) plt.savefig("figures/histogram_"+fieldname+".pdf")
os.mkdir(root_output+c) ##get keys current_collection = db[c] document_example = current_collection.find_one() list_keys = get_allkeys(document_example) #get a random example (might not contain all the keys accross all documents of the current collection) total_number_of_documents = current_collection.find().count() for k in list_keys: try: counts_distinct = len(current_collection.distinct(k)) except pymongo.errors.OperationFailure: #if returns too many documents print "operationfailure" if counts_distinct <= threshold: data = count_occurences_field(current_collection,k) x = data.keys() y = data.values() pos = np.arange(len(x))+0.5 if len(x) > 1: #do not plot if only 1 element if not isinstance(x[0],float): #keep float for plot x = [type_to_string(i) for i in data.keys()] x = [i.decode('utf-8') for i in x] plt.figure() plt.barh(pos, y) plt.axvline(total_number_of_documents,linestyle="dashed",color="black") plt.xlabel("Counts") plt.ylabel(k) plt.yticks(pos+0.5,x)