Example #1
0
def lookup_contacts(contact_uids,mdists,env):
    """
    lookup user profiles for contacts or leafs
    """
    twit = twitter.TwitterResource()
    gis = gisgraphy.GisgraphyResource()
    gis.set_mdists(mdists)

    # FIXME: we need a better way to know which file we are on.
    # FIXME: use the new input_paths thing
    first, contact_uids = utils.peek(contact_uids)
    group = User.mod_id(first)
    logging.info('lookup old uids for %s',group)
    save_name = 'saved_users.%s'%group
    if env.name_exists(save_name):
        stored = set(env.load(save_name))
    else:
        stored = User.mod_id_set(int(group))
    logging.info('loaded mod_group %s of %d users',group,len(stored))
    missing = (id for id in contact_uids if id not in stored)

    chunks = utils.grouper(100, missing, dontfill=True)
    for chunk in chunks:
        users = twit.user_lookup(user_ids=list(chunk))
        for amigo in filter(None,users):
            assert User.mod_id(amigo._id)==group
            amigo.geonames_place = gis.twitter_loc(amigo.location)
            amigo.merge()
        yield len(users)
Example #2
0
def trash_extra_mloc(mloc_uids):
    "remove the mloc_users that mloc_uids skipped over"
    # This scares me a bit, but it's too late to go back and fix find_contacts.
    # I really wish I had limited find_contacts to stop after 2500 good users.
    db = User.database
    mloc_uids = set(mloc_uids)
    group_ = set(uid%100 for uid in mloc_uids)
    assert len(group_)==1
    group = next(iter(group_))
    stored = User.mod_id_set(group)
    trash = list(stored - mloc_uids)
    logging.info("trashing %d users",len(trash))
    logging.debug("full list: %r",trash)
    db.Edges.remove({'_id':{'$in':trash}})
    db.Tweets.remove({'_id':{'$in':trash}})
    db.User.remove({'_id':{'$in':trash}})