def _paged_users(uids, **find_kwargs): # save some round trips by asking for 100 at a time groups = utils.grouper(100, uids, dontfill=True) return chain.from_iterable( User.find(User._id.is_in(list(group)), **find_kwargs) for group in groups )
def lookup_contacts(contact_uids,mdists,env): """ lookup user profiles for contacts or leafs """ twit = twitter.TwitterResource() gis = gisgraphy.GisgraphyResource() gis.set_mdists(mdists) # FIXME: we need a better way to know which file we are on. # FIXME: use the new input_paths thing first, contact_uids = utils.peek(contact_uids) group = User.mod_id(first) logging.info('lookup old uids for %s',group) save_name = 'saved_users.%s'%group if env.name_exists(save_name): stored = set(env.load(save_name)) else: stored = User.mod_id_set(int(group)) logging.info('loaded mod_group %s of %d users',group,len(stored)) missing = (id for id in contact_uids if id not in stored) chunks = utils.grouper(100, missing, dontfill=True) for chunk in chunks: users = twit.user_lookup(user_ids=list(chunk)) for amigo in filter(None,users): assert User.mod_id(amigo._id)==group amigo.geonames_place = gis.twitter_loc(amigo.location) amigo.merge() yield len(users)
def pred_users(uids): """ fetch target users from database """ for g in utils.grouper(100,uids,dontfill=True): ids_group = tuple(g) for u in User.find(User._id.is_in(ids_group)): yield u.to_d()
def _fetch_profiles(uids,twit,gis): users = list(User.find(User._id.is_in(uids))) existing_ids = {u._id for u in users} missing_ids = [uid for uid in uids if uid not in existing_ids] chunks = utils.grouper(100, missing_ids, dontfill=True) for chunk in chunks: found = twit.user_lookup(user_ids=list(chunk)) for amigo in filter(None,found): amigo.geonames_place = gis.twitter_loc(amigo.location) amigo.merge() users.append(amigo) return users