Example #1
0
 def test_fix_mloc_mdists(self):
     self.FS["mdists"] = [dict(other=2)]
     self.FS["mloc_uids.03"] = [3, 103]
     User(_id=3, location="Texas").save()
     User(_id=103, location="Bryan, TX").save()
     with _patch_gisgraphy():
         self.gob.run_job("fix_mloc_mdists")
     u3 = User.get_id(3)
     u103 = User.get_id(103)
     self.assertEqual(u3.geonames_place.mdist, 2000)
     self.assertEqual(u103.geonames_place.mdist, 2)
Example #2
0
def rfr_triads(user_d):
    """
    find a target users with a social triangle and a recip friend not in that
    triangle. Return info about all four users.
    """
    # We are looking for this structure in the social graph:
    # my  you---our
    #   \  |  /
    #      me
    # me is a target user, the other users are contacts, and the edges are all
    # reciprocal.
    me = User(user_d)
    me_rfr = set(me.rfriends or []).intersection(me.neighbors or [])
    if len(me_rfr)<3:
        return []
    for you_id in me_rfr:
        you_ed = Edges.get_id(you_id)
        if not you_ed:
            continue #There are no edges for this neighbor.
        ours = me_rfr.intersection(you_ed.friends,you_ed.followers)
        mine = me_rfr.difference(you_ed.friends,you_ed.followers)
        if ours and mine:
            d = dict(
                me = dict(_id=me._id,loc=me.median_loc),
                you = dict(_id=you_id),
                my = dict(_id=random.choice(list(mine))),
                our = dict(_id=random.choice(list(ours))),
                )
            for k,v in d.iteritems():
                if k=='me': continue
                gnp = User.get_id(v['_id'],fields=['gnp']).geonames_place.to_d()
                gnp.pop('zipcode',None)
                v['loc'] = gnp
            return [d]
    return []
Example #3
0
def cheap_locals(nebr_ids,mloc_uids,cutoff=20):
    """
    local contact ratio based on 20 leafs
    """
    seen = set()
    # There can be duplicates because nebr_ids is created by clumping nebr_split
    for nebr_id in nebr_ids:
        if nebr_id in seen:
            continue
        seen.add(nebr_id)

        user = User.get_id(nebr_id)
        user_loc = user.geonames_place.to_d()

        cids = [
            cid
            for key in User.NEBR_KEYS
            for cid in (getattr(user,key) or [])
            if cid not in mloc_uids
            ]
        if not cids:
            continue
        random.shuffle(cids)
        leafs = User.find(User._id.is_in(cids[:cutoff]), fields=['gnp'])

        dists = [
            coord_in_miles(user_loc,leaf.geonames_place.to_d())
            for leaf in leafs
            if leaf.has_place()
        ]
        if dists:
            blur = sum(1.0 for d in dists if d<25)/len(dists)
            yield user._id,blur
Example #4
0
 def test_find_contacts(self):
     self._find_contacts_6()
     results = self.FS["find_contacts.06"]
     s_res = sorted(list(r[1])[0] for r in results)
     self.assertEqual(s_res, [0, 1, 2, 3, 7, 12, 18, 24, 30])
     flor = User.get_id(6)
     self.assertEqual(flor.just_mentioned, [7])
     self.assertEqual(sorted(flor.just_friends), [12, 18, 24, 30])
Example #5
0
def find_leafs(uid):
    """
    for each contact, fetch edges and tweets, pick 100 leaf ids
    """
    twit = twitter.TwitterResource()
    user = User.get_id(uid)
    _save_user_contacts(twit, user, _pick_random_contacts, limit=100)
    return _my_contacts(user)
Example #6
0
def pick_nebrs(mloc_uid):
    """
    For each target user, pick the 25 located contacts.
    """
    # reads predict.prep.mloc_uids, requires lookup_contacts, but don't read it.
    user = User.get_id(mloc_uid)
    user.neighbors = _pick_neighbors(user)
    user.save()
    return ((User.mod_id(n),n) for n in user.neighbors)
Example #7
0
    def test_lookup_contacts(self):
        self.FS["mdists"] = [dict(other=2.5)]
        self.FS["contact_split.04"] = [4, 404]
        User.database.User = mock.MagicMock()
        User.database.User.find.return_value = [
            # MockTwitterResource will throw a 404 if you lookup user 404.
            # This lets us know the user was skipped.
            dict(_id=404)
        ]

        with _patch_twitter():
            with _patch_gisgraphy():
                self.gob.run_job("lookup_contacts")

        beryl = User.get_id(4)
        self.assertEqual(beryl.screen_name, "user_4")
        self.assertEqual(beryl.geonames_place.feature_code, "PPLA2")
        self.assertEqual(beryl.geonames_place.mdist, 3)
        missing = User.get_id(404)
        self.assertEqual(missing, None)
Example #8
0
 def test_find_contacts_errors(self):
     self.FS["mloc_users.04"] = [dict(id=404)]
     self.FS["mloc_users.03"] = [dict(id=503)]
     with _patch_twitter():
         self.gob.run_job("find_contacts")
     for uid in (404, 503):
         missing = User.get_id(uid)
         self.assertEqual(missing.error_status, uid)
         self.assertEqual(missing.neighbors, None)
         self.assertEqual(missing.rfriends, None)
         self.assertEqual(Edges.get_id(uid), None)
         self.assertEqual(Tweets.get_id(uid), None)
Example #9
0
def find_contacts(user_ds):
    """
    for each target user, fetch edges and tweets, pick 100 located contact ids
    """
    gis = gisgraphy.GisgraphyResource()
    twit = twitter.TwitterResource()
    for user_d in itertools.islice(user_ds,2600):
        user = User.get_id(user_d['id'])
        if user:
            logging.warn("not revisiting %d",user._id)
        else:
            user = User(user_d)
            user.geonames_place = gis.twitter_loc(user.location)
            _save_user_contacts(twit, user, _pick_random_contacts, limit=100)
        for mod_nebr in _my_contacts(user):
            yield mod_nebr
Example #10
0
def total_contacts(user_ds):
    """
    count the total number of contacts (to include in the paper)
    """
    for user_d in itertools.islice(user_ds,2600):
        user = User.get_id(user_d['id'])

        if not user:
            yield "no user"
        elif user.error_status:
            yield str(user.error_status)
        else:
            edges = Edges.get_id(user._id)
            tweets = Tweets.get_id(user._id)
            if not edges or not tweets:
                yield "no contacts"
            else:
                sets = _contact_sets(tweets,edges)
                yield [len(sets[k]) for k in User.NEBR_KEYS]
Example #11
0
def edges_d(user_d, geo_ats):
    """
    create one dict per target user with information about one selected contact
    for each of the four types of contact
    """
    me = User(user_d)
    if not me.neighbors:
        return []
    nebrs = set(me.neighbors)
    me_usa = _in_usa(me.median_loc[0],me.median_loc[1])

    keys = {'just_followers':'jfol',
            'just_friends':'jfrd',
            'rfriends':'rfrd',
            'just_mentioned':'jat'}
    rels = dict(_id = me._id, mloc = me.median_loc)
    for long,short in keys.iteritems():
        amigos = [a for a in getattr(me,long) if a in nebrs]
        if not amigos:
            continue
        amigo = User.get_id(amigos[0])
        gnp = amigo.geonames_place.to_d()
        if gnp['mdist']>1000:
            continue
        rels[short] = dict(
                folc=amigo.followers_count,
                frdc=amigo.friends_count,
                lofrd=amigo.local_friends,
                lofol=amigo.local_followers,
                prot=amigo.protected,
                lat=gnp['lat'],
                lng=gnp['lng'],
                mdist=gnp['mdist'],
                _id=amigo._id,
                i_at=_ated(geo_ats,me._id,amigo._id),
                u_at=_ated(geo_ats,amigo._id,me._id),
                usa = me_usa and _in_usa(gnp['lng'],gnp['lat']),
                )

    return [rels]