Example #1
0
def cheap_locals(nebr_ids,mloc_uids,cutoff=20):
    """
    local contact ratio based on 20 leafs
    """
    seen = set()
    # There can be duplicates because nebr_ids is created by clumping nebr_split
    for nebr_id in nebr_ids:
        if nebr_id in seen:
            continue
        seen.add(nebr_id)

        user = User.get_id(nebr_id)
        user_loc = user.geonames_place.to_d()

        cids = [
            cid
            for key in User.NEBR_KEYS
            for cid in (getattr(user,key) or [])
            if cid not in mloc_uids
            ]
        if not cids:
            continue
        random.shuffle(cids)
        leafs = User.find(User._id.is_in(cids[:cutoff]), fields=['gnp'])

        dists = [
            coord_in_miles(user_loc,leaf.geonames_place.to_d())
            for leaf in leafs
            if leaf.has_place()
        ]
        if dists:
            blur = sum(1.0 for d in dists if d<25)/len(dists)
            yield user._id,blur
Example #2
0
def _paged_users(uids, **find_kwargs):
    # save some round trips by asking for 100 at a time
    groups = utils.grouper(100, uids, dontfill=True)
    return chain.from_iterable(
        User.find(User._id.is_in(list(group)), **find_kwargs)
        for group in groups
    )
Example #3
0
def pred_users(uids):
    """
    fetch target users from database
    """
    for g in utils.grouper(100,uids,dontfill=True):
        ids_group = tuple(g)
        for u in User.find(User._id.is_in(ids_group)):
            yield u.to_d()
Example #4
0
def nebr_dists(mloc_tile):
    """
    find the distances from target users to their contacts
    """
    nebrs = User.find(User._id.is_in(mloc_tile['nebrs']),fields=['gnp'])
    for nebr in nebrs:
        dist = coord_in_miles(mloc_tile['mloc'], nebr.geonames_place.to_d())
        # add a one at the end to make the output format identical to
        # stranger_dists.
        yield dist,1
Example #5
0
def mloc_tile(mloc_uids):
    """
    split the target users into tiles based on their home location
    """
    users = User.find(User._id.is_in(tuple(mloc_uids)),fields=['mloc','nebrs'])
    for user in users:
        if not user.neighbors:
            continue
        lng,lat = user.median_loc
        yield _tile(lat),user.to_d()
Example #6
0
def mloc_uids(user_ds):
    """
    pick 2500 target users who have locations and good contacts
    """
    retrieved = [u['id'] for u in itertools.islice(user_ds,2600)]
    users = User.find(User._id.is_in(retrieved))
    good_ = { u._id for u in users if any(getattr(u,k) for k in NEBR_KEYS)}
    good = [uid for uid in retrieved if uid in good_]
    logging.info("found %d of %d",len(good),len(retrieved))
    # throw away accounts that didn't work to get down to the 2500 good users
    return good[:2500]
Example #7
0
def _fetch_profiles(uids,twit,gis):
    users = list(User.find(User._id.is_in(uids)))
    existing_ids = {u._id for u in users}
    missing_ids = [uid for uid in uids if uid not in existing_ids]

    chunks = utils.grouper(100, missing_ids, dontfill=True)
    for chunk in chunks:
        found = twit.user_lookup(user_ids=list(chunk))
        for amigo in filter(None,found):
            amigo.geonames_place = gis.twitter_loc(amigo.location)
            amigo.merge()
            users.append(amigo)
    return users
Example #8
0
def nebrs_d(user_d,mloc_blur):
    """
    create dict with lots of information about a target user's located contacts
    """
    mb = MlocBlur(*mloc_blur)
    user = User(user_d)
    nebrs = User.find(User._id.is_in(user_d['nebrs']))
    tweets = Tweets.get_id(user_d['_id'],fields=['ats'])

    res = make_nebrs_d(user,nebrs,tweets.ats)
    res['mloc'] = user_d['mloc']
    res['gnp'] = _blur_gnp(mb, user_d)
    return [res]
Example #9
0
def _pick_neighbors(user):
    nebrs = {}
    for key in NEBR_KEYS:
        cids = getattr(user,key)
        if not cids:
            continue

        # this is slowish
        contacts = User.find(User._id.is_in(cids), fields=['gnp'])
        nebrs[key] = set(u._id for u in contacts if u.has_place())

    picked_ = filter(None,
                itertools.chain.from_iterable(
                    itertools.izip_longest(*nebrs.values())))
    picked = picked_[:25]
    logging.info('picked %d of %d contacts',len(picked),len(user.contacts))
    return picked
Example #10
0
def fix_mloc_mdists(mloc_uids,mdists):
    """
    Add the median location error to profiles of contacts and target users.
    """
    gis = gisgraphy.GisgraphyResource()
    gis.set_mdists(mdists)
    # We didn't have mdists at the time the mloc users were saved. This
    # function could be avoided by running the mdist calculation before
    # running find_contacts.
    fixed = 0
    users = User.find(User._id.is_in(tuple(mloc_uids)))
    for user in users:
        user.geonames_place = gis.twitter_loc(user.location)
        user.save()
        if user.geonames_place:
            fixed+=1
    logging.info("fixed %d mdists",fixed)
    return [fixed]