def save_user_counts(): """This saves users and corresponding feature counts to a CSV file.""" statfile = csv.writer(open(GENERAL_STATS, "w")) full_ones = user_coll.find( spec={"username": {"$exists": True} }, fields=CONNS.values(), timeout=False) # Write CSV header statfile.writerow(["username"] + CONNS.values() ) for each in full_ones: row = [] row.append(each["_id"]) for field in CONNS: row.append(each[CONNS[field]]) statfile.writerow(row)
def save_count_mismatch(): """Finds and saves users for whom a feature's count does not match the stored count value.""" field_names = CONNS.keys() + CONNS.values() # Find users with complete documents cursor = user_coll.find( spec={"username": {"$exists": True} }, fields=field_names, timeout=False) statfile = csv.writer(open(MISMATCH_STATS, "w")) # CSV header statfile.writerow(["username", "connection", "real_count", "stored_count"]) for user in cursor: for conn in CONNS: if ( len(user[conn]) != user[CONNS[conn]] ): row = [] row.append(user["_id"]) row.append(conn) row.append(len(user[conn])) row.append(user[CONNS[conn]]) statfile.writerow(row)
user_coll = MONGO_COLLECTION def correct_counts(conn_list): for conn in conn_list: if conn not in CONNS: print "ERROR: One of the metaconnections given is not valid:", conn return cursor = user_coll.find( spec={"username": {"$exists": True} }, fields=[conn, CONNS[conn]], timeout=False) correct_counts = {} for user in cursor: real_count = len(user[conn]) if ( real_count != user[CONNS[conn]] ): correct_counts[user["_id"]] = real_count for each_id in correct_counts: user_coll.update( spec={"_id": each_id}, document={"$set": {CONNS[conn]: correct_counts[each_id]} } ) if __name__ == "__main__": import sys correct_counts(CONNS.keys())