def describe_venue(venues, city, depth=2, limit=None): """Gather some statistics about venue, aggregating categories at `depth` level.""" query = cm.build_query(city, False, ['cat', 'likes'], limit) group = {'_id': '$cat', 'count': {'$sum': 1}, 'like': {'$sum': '$likes'}} query.extend([{'$group': group}, {'$sort': {'count': -1}}]) res = venues.aggregate(query)['result'] def parenting_cat(place, depth): """Return the category of `place`, without going beyond `depth`""" _, path = fsc.search_categories(place['_id']) if len(path) > depth: return fsc.CAT_TO_ID[:path[depth]] return fsc.CAT_TO_ID[:path[-1]] summary = defaultdict(lambda: (0, 0)) nb_venues = 0 for venue in res: if venue['_id'] is not None: cat = parenting_cat(venue, depth) count, like = venue['count'], venue['like'] nb_venues += count summary[cat] = (summary[cat][0] + count, summary[cat][1] + like) for cat, stat in summary.iteritems(): count, like = stat summary[cat] = (100.0*count/nb_venues, count, like) return OrderedDict(sorted(summary.items(), key=lambda u: u[1][0], reverse=True))
def all_places_from_venue(checkins, city, converse=False): """Associate each venue with a list twitter place id (or do the `converse`)""" query = cm.build_query(city, fields=['lid', 'place']) index, values = '$lid', '$place' if converse: index, values = values, index query.append({"$group": {'_id': index, 'others': {'$push': values}}}) answer = checkins.aggregate(query)['result'] return {venue['_id']: venue['others'] for venue in answer if venue['_id']}
def venues_activity(checkins, city, limit=None): """Return time pattern of all the venues in 'city', or only the 'limit' most visited.""" query = cm.build_query(city, True, ['lid', 'time'], limit) group = {'_id': '$lid', 'count': {'$sum': 1}, 'visits': {'$push': '$time'}} query.insert(2, {'$group': group}) if isinstance(limit, int) and limit > 0: query.insert(-1, {'$sort': {'count': -1}}) res = checkins.aggregate(query)['result'] hourly = [] weekly = [] for venue in res: hour, day = aggregate_visits(venue['visits']) hourly.append(hour) weekly.append(day) return hourly, weekly
def output_checkins(city, host=cm.HOST, port=cm.PORT): """Write a JS array of all checkins in `city` with their hour.""" checkins = cm.connect_to_db('foursquare', host, port)[0]['checkin'] query = cm.build_query(city, venue=False, fields=['loc', 'time']) res = checkins.aggregate(query)['result'] def format_checkin(checkin): """Extract location (plus jitter) and hour from checkin""" lng, lat = checkin['loc']['coordinates'] hour = checkin['time'].hour return [lng + noise(), lat + noise(), hour] formated = [str(format_checkin(c)) for c in res] with open(city + '_fs.js', 'w') as output: output.write('var helsinki_fs = [\n') output.write(',\n'.join(formated)) output.write('];')
def output_checkins(city, host=cm.HOST, port=cm.PORT): """Write a JS array of all checkins in `city` with their hour.""" checkins = cm.connect_to_db("foursquare", host, port)[0]["checkin"] query = cm.build_query(city, venue=False, fields=["loc", "time"]) res = checkins.aggregate(query)["result"] def format_checkin(checkin): """Extract location (plus jitter) and hour from checkin""" lng, lat = checkin["loc"]["coordinates"] hour = checkin["time"].hour return [lng + noise(), lat + noise(), hour] formated = [str(format_checkin(c)) for c in res] with open(city + "_fs.js", "w") as output: output.write("var helsinki_fs = [\n") output.write(",\n".join(formated)) output.write("];")
def output_checkins(city, host=cm.HOST, port=cm.PORT): """Write a JS array of all checkins in `city` with their hour.""" print 'utils.py/output_checkins' checkins = cm.connect_to_db('foursquare', host, port)[0]['checkin'] query = cm.build_query(city, venue=False, fields=['loc', 'time']) res = checkins.aggregate(query)['result'] def format_checkin(checkin): """Extract location (plus jitter) and hour from checkin""" print 'utils.py/format_checkin' lng, lat = checkin['loc']['coordinates'] hour = checkin['time'].hour return [lng + noise(), lat + noise(), hour] formated = [str(format_checkin(c)) for c in res] with open(city + '_fs.js', 'w') as output: output.write('var helsinki_fs = [\n') output.write(',\n'.join(formated)) output.write('];')
def get_users(args): import CommonMongo as cm city = args.city try: return p.load_var(city + '_users.my') except IOError: pass db = cm.connect_to_db('foursquare', args.host, args.port)[0] # First get a list of all users so far user_query = cm.build_query(city, venue=True, fields=['tuid']) group = {'$group': {'_id': '$tuid', 'checkins': {'$sum': 1}}} user_query.extend([group, {'$sort': {'checkins': -1}}]) users = db.checkin.aggregate(user_query)['result'] # See how many they are and their check-ins count distribution # import utils as u # import pandas as pd # print(len(users)) # infos = u.xzip(users, '_id checkins'.split()) # df_users = pd.DataFrame(index=map(int, infos[0]), # data=dict(count=infos[1])) # ppl.hist(df_users.values, bins=25) users = OrderedDict([(_['_id'], _['checkins']) for _ in users]) return users.keys()
def get_users(args): import CommonMongo as cm city = args.city try: return p.load_var(city+'_users.my') except IOError: pass db = cm.connect_to_db('foursquare', args.host, args.port)[0] # First get a list of all users so far user_query = cm.build_query(city, venue=True, fields=['tuid']) group = {'$group': {'_id': '$tuid', 'checkins': {'$sum': 1}}} user_query.extend([group, {'$sort': {'checkins': -1}}]) users = db.checkin.aggregate(user_query)['result'] # See how many they are and their check-ins count distribution # import utils as u # import pandas as pd # print(len(users)) # infos = u.xzip(users, '_id checkins'.split()) # df_users = pd.DataFrame(index=map(int, infos[0]), # data=dict(count=infos[1])) # ppl.hist(df_users.values, bins=25) users = OrderedDict([(_['_id'], _['checkins']) for _ in users]) return users.keys()