Example #1
0
def describe_venue(venues, city, depth=2, limit=None):
    """Gather some statistics about venue, aggregating categories at `depth`
    level."""
    query = cm.build_query(city, False, ['cat', 'likes'], limit)
    group = {'_id': '$cat', 'count': {'$sum': 1}, 'like': {'$sum': '$likes'}}
    query.extend([{'$group': group}, {'$sort': {'count': -1}}])
    res = venues.aggregate(query)['result']

    def parenting_cat(place, depth):
        """Return the category of `place`, without going beyond `depth`"""
        _, path = fsc.search_categories(place['_id'])
        if len(path) > depth:
            return fsc.CAT_TO_ID[:path[depth]]
        return fsc.CAT_TO_ID[:path[-1]]

    summary = defaultdict(lambda: (0, 0))
    nb_venues = 0
    for venue in res:
        if venue['_id'] is not None:
            cat = parenting_cat(venue, depth)
            count, like = venue['count'], venue['like']
            nb_venues += count
            summary[cat] = (summary[cat][0] + count, summary[cat][1] + like)

    for cat, stat in summary.iteritems():
        count, like = stat
        summary[cat] = (100.0*count/nb_venues, count, like)
    return OrderedDict(sorted(summary.items(), key=lambda u: u[1][0],
                              reverse=True))
Example #2
0
def all_places_from_venue(checkins, city, converse=False):
    """Associate each venue with a list twitter place id (or do the
    `converse`)"""
    query = cm.build_query(city, fields=['lid', 'place'])
    index, values = '$lid', '$place'
    if converse:
        index, values = values, index
    query.append({"$group": {'_id': index, 'others': {'$push': values}}})
    answer = checkins.aggregate(query)['result']
    return {venue['_id']: venue['others'] for venue in answer if venue['_id']}
Example #3
0
def all_places_from_venue(checkins, city, converse=False):
    """Associate each venue with a list twitter place id (or do the
    `converse`)"""
    query = cm.build_query(city, fields=['lid', 'place'])
    index, values = '$lid', '$place'
    if converse:
        index, values = values, index
    query.append({"$group": {'_id': index, 'others': {'$push': values}}})
    answer = checkins.aggregate(query)['result']
    return {venue['_id']: venue['others'] for venue in answer if venue['_id']}
Example #4
0
def venues_activity(checkins, city, limit=None):
    """Return time pattern of all the venues in 'city', or only the 'limit'
    most visited."""
    query = cm.build_query(city, True, ['lid', 'time'], limit)
    group = {'_id': '$lid', 'count': {'$sum': 1}, 'visits': {'$push': '$time'}}
    query.insert(2, {'$group': group})
    if isinstance(limit, int) and limit > 0:
        query.insert(-1, {'$sort': {'count': -1}})
    res = checkins.aggregate(query)['result']
    hourly = []
    weekly = []
    for venue in res:
        hour, day = aggregate_visits(venue['visits'])
        hourly.append(hour)
        weekly.append(day)
    return hourly, weekly
Example #5
0
def output_checkins(city, host=cm.HOST, port=cm.PORT):
    """Write a JS array of all checkins in `city` with their hour."""
    checkins = cm.connect_to_db('foursquare', host, port)[0]['checkin']
    query = cm.build_query(city, venue=False, fields=['loc', 'time'])
    res = checkins.aggregate(query)['result']

    def format_checkin(checkin):
        """Extract location (plus jitter) and hour from checkin"""
        lng, lat = checkin['loc']['coordinates']
        hour = checkin['time'].hour
        return [lng + noise(), lat + noise(), hour]

    formated = [str(format_checkin(c)) for c in res]
    with open(city + '_fs.js', 'w') as output:
        output.write('var helsinki_fs = [\n')
        output.write(',\n'.join(formated))
        output.write('];')
Example #6
0
def output_checkins(city, host=cm.HOST, port=cm.PORT):
    """Write a JS array of all checkins in `city` with their hour."""
    checkins = cm.connect_to_db("foursquare", host, port)[0]["checkin"]
    query = cm.build_query(city, venue=False, fields=["loc", "time"])
    res = checkins.aggregate(query)["result"]

    def format_checkin(checkin):
        """Extract location (plus jitter) and hour from checkin"""
        lng, lat = checkin["loc"]["coordinates"]
        hour = checkin["time"].hour
        return [lng + noise(), lat + noise(), hour]

    formated = [str(format_checkin(c)) for c in res]
    with open(city + "_fs.js", "w") as output:
        output.write("var helsinki_fs = [\n")
        output.write(",\n".join(formated))
        output.write("];")
def output_checkins(city, host=cm.HOST, port=cm.PORT):
    """Write a JS array of all checkins in `city` with their hour."""
    print 'utils.py/output_checkins'
    checkins = cm.connect_to_db('foursquare', host, port)[0]['checkin']
    query = cm.build_query(city, venue=False, fields=['loc', 'time'])
    res = checkins.aggregate(query)['result']

    def format_checkin(checkin):
        """Extract location (plus jitter) and hour from checkin"""
        print 'utils.py/format_checkin'
        lng, lat = checkin['loc']['coordinates']
        hour = checkin['time'].hour
        return [lng + noise(), lat + noise(), hour]

    formated = [str(format_checkin(c)) for c in res]
    with open(city + '_fs.js', 'w') as output:
        output.write('var helsinki_fs = [\n')
        output.write(',\n'.join(formated))
        output.write('];')
Example #8
0
def get_users(args):
    import CommonMongo as cm
    city = args.city
    try:
        return p.load_var(city + '_users.my')
    except IOError:
        pass
    db = cm.connect_to_db('foursquare', args.host, args.port)[0]
    # First get a list of all users so far
    user_query = cm.build_query(city, venue=True, fields=['tuid'])
    group = {'$group': {'_id': '$tuid', 'checkins': {'$sum': 1}}}
    user_query.extend([group, {'$sort': {'checkins': -1}}])
    users = db.checkin.aggregate(user_query)['result']
    # See how many they are and their check-ins count distribution
    # import utils as u
    # import pandas as pd
    # print(len(users))
    # infos = u.xzip(users, '_id checkins'.split())
    # df_users = pd.DataFrame(index=map(int, infos[0]),
    #                         data=dict(count=infos[1]))
    # ppl.hist(df_users.values, bins=25)
    users = OrderedDict([(_['_id'], _['checkins']) for _ in users])
    return users.keys()
Example #9
0
def get_users(args):
    import CommonMongo as cm
    city = args.city
    try:
        return p.load_var(city+'_users.my')
    except IOError:
        pass
    db = cm.connect_to_db('foursquare', args.host, args.port)[0]
    # First get a list of all users so far
    user_query = cm.build_query(city, venue=True, fields=['tuid'])
    group = {'$group': {'_id': '$tuid', 'checkins': {'$sum': 1}}}
    user_query.extend([group, {'$sort': {'checkins': -1}}])
    users = db.checkin.aggregate(user_query)['result']
    # See how many they are and their check-ins count distribution
    # import utils as u
    # import pandas as pd
    # print(len(users))
    # infos = u.xzip(users, '_id checkins'.split())
    # df_users = pd.DataFrame(index=map(int, infos[0]),
    #                         data=dict(count=infos[1]))
    # ppl.hist(df_users.values, bins=25)
    users = OrderedDict([(_['_id'], _['checkins']) for _ in users])
    return users.keys()