Beispiel #1
0
def lookup_contacts(contact_uids,mdists,env):
    """
    lookup user profiles for contacts or leafs
    """
    twit = twitter.TwitterResource()
    gis = gisgraphy.GisgraphyResource()
    gis.set_mdists(mdists)

    # FIXME: we need a better way to know which file we are on.
    # FIXME: use the new input_paths thing
    first, contact_uids = utils.peek(contact_uids)
    group = User.mod_id(first)
    logging.info('lookup old uids for %s',group)
    save_name = 'saved_users.%s'%group
    if env.name_exists(save_name):
        stored = set(env.load(save_name))
    else:
        stored = User.mod_id_set(int(group))
    logging.info('loaded mod_group %s of %d users',group,len(stored))
    missing = (id for id in contact_uids if id not in stored)

    chunks = utils.grouper(100, missing, dontfill=True)
    for chunk in chunks:
        users = twit.user_lookup(user_ids=list(chunk))
        for amigo in filter(None,users):
            assert User.mod_id(amigo._id)==group
            amigo.geonames_place = gis.twitter_loc(amigo.location)
            amigo.merge()
        yield len(users)
Beispiel #2
0
def at_tuples(geo_at):
    """
    create (mentioned user, tweet creator) pairs from geo_ats, and split based
    on user id of mentioned user
    """
    uid,ats = geo_at
    for at in ats:
        yield User.mod_id(at), (at,uid)
Beispiel #3
0
def geo_ats():
    """
    fetch all at mentions from database
    """
    for tweets in Tweets.find({},fields=['ats']):
        if tweets.ats:
            uid = tweets._id
            yield User.mod_id(uid), (uid,tweets.ats)
Beispiel #4
0
def saved_users():
    """
    Create set of ids already already in the database so that lookup_contacts
    can skip these users.  Talking to the database in lookup_contacts to check
    if users are in the database is too slow.
    """
    users = User.database.User.find({},fields=[],timeout=False)
    return ((User.mod_id(u['_id']),u['_id']) for u in users)
Beispiel #5
0
def pick_nebrs(mloc_uid):
    """
    For each target user, pick the 25 located contacts.
    """
    # reads predict.prep.mloc_uids, requires lookup_contacts, but don't read it.
    user = User.get_id(mloc_uid)
    user.neighbors = _pick_neighbors(user)
    user.save()
    return ((User.mod_id(n),n) for n in user.neighbors)
Beispiel #6
0
def parse_geotweets(tweets):
    """
    read tweets from Twitter's streaming API and save users and their tweets
    USAGE: gunzip -c ~/may/*/*.gz | ./gb.py -s parse_geotweets
    """
    # We save users and locations intermingled because this data is too big to
    # fit in memory, and we do not want to do two passes.
    users = set()
    for i,t in enumerate(tweets):
        if i%10000 ==0:
            logging.info("read %d tweets"%i)
        if 'id' not in t: continue # this is not a tweet
        uid = t['user']['id']
        if not t.get('coordinates'): continue
        if uid not in users:
            yield User.mod_id(uid),t['user']
            users.add(uid)
        yield User.mod_id(uid),(uid,t['coordinates']['coordinates'])
    logging.info("sending up to %d users"%len(users))
Beispiel #7
0
def _my_contacts(user):
    return ((User.mod_id(c),c) for c in user.contacts)