def test_fix_mloc_mdists(self): self.FS["mdists"] = [dict(other=2)] self.FS["mloc_uids.03"] = [3, 103] User(_id=3, location="Texas").save() User(_id=103, location="Bryan, TX").save() with _patch_gisgraphy(): self.gob.run_job("fix_mloc_mdists") u3 = User.get_id(3) u103 = User.get_id(103) self.assertEqual(u3.geonames_place.mdist, 2000) self.assertEqual(u103.geonames_place.mdist, 2)
def rfr_triads(user_d): """ find a target users with a social triangle and a recip friend not in that triangle. Return info about all four users. """ # We are looking for this structure in the social graph: # my you---our # \ | / # me # me is a target user, the other users are contacts, and the edges are all # reciprocal. me = User(user_d) me_rfr = set(me.rfriends or []).intersection(me.neighbors or []) if len(me_rfr)<3: return [] for you_id in me_rfr: you_ed = Edges.get_id(you_id) if not you_ed: continue #There are no edges for this neighbor. ours = me_rfr.intersection(you_ed.friends,you_ed.followers) mine = me_rfr.difference(you_ed.friends,you_ed.followers) if ours and mine: d = dict( me = dict(_id=me._id,loc=me.median_loc), you = dict(_id=you_id), my = dict(_id=random.choice(list(mine))), our = dict(_id=random.choice(list(ours))), ) for k,v in d.iteritems(): if k=='me': continue gnp = User.get_id(v['_id'],fields=['gnp']).geonames_place.to_d() gnp.pop('zipcode',None) v['loc'] = gnp return [d] return []
def cheap_locals(nebr_ids,mloc_uids,cutoff=20): """ local contact ratio based on 20 leafs """ seen = set() # There can be duplicates because nebr_ids is created by clumping nebr_split for nebr_id in nebr_ids: if nebr_id in seen: continue seen.add(nebr_id) user = User.get_id(nebr_id) user_loc = user.geonames_place.to_d() cids = [ cid for key in User.NEBR_KEYS for cid in (getattr(user,key) or []) if cid not in mloc_uids ] if not cids: continue random.shuffle(cids) leafs = User.find(User._id.is_in(cids[:cutoff]), fields=['gnp']) dists = [ coord_in_miles(user_loc,leaf.geonames_place.to_d()) for leaf in leafs if leaf.has_place() ] if dists: blur = sum(1.0 for d in dists if d<25)/len(dists) yield user._id,blur
def test_find_contacts(self): self._find_contacts_6() results = self.FS["find_contacts.06"] s_res = sorted(list(r[1])[0] for r in results) self.assertEqual(s_res, [0, 1, 2, 3, 7, 12, 18, 24, 30]) flor = User.get_id(6) self.assertEqual(flor.just_mentioned, [7]) self.assertEqual(sorted(flor.just_friends), [12, 18, 24, 30])
def find_leafs(uid): """ for each contact, fetch edges and tweets, pick 100 leaf ids """ twit = twitter.TwitterResource() user = User.get_id(uid) _save_user_contacts(twit, user, _pick_random_contacts, limit=100) return _my_contacts(user)
def pick_nebrs(mloc_uid): """ For each target user, pick the 25 located contacts. """ # reads predict.prep.mloc_uids, requires lookup_contacts, but don't read it. user = User.get_id(mloc_uid) user.neighbors = _pick_neighbors(user) user.save() return ((User.mod_id(n),n) for n in user.neighbors)
def test_lookup_contacts(self): self.FS["mdists"] = [dict(other=2.5)] self.FS["contact_split.04"] = [4, 404] User.database.User = mock.MagicMock() User.database.User.find.return_value = [ # MockTwitterResource will throw a 404 if you lookup user 404. # This lets us know the user was skipped. dict(_id=404) ] with _patch_twitter(): with _patch_gisgraphy(): self.gob.run_job("lookup_contacts") beryl = User.get_id(4) self.assertEqual(beryl.screen_name, "user_4") self.assertEqual(beryl.geonames_place.feature_code, "PPLA2") self.assertEqual(beryl.geonames_place.mdist, 3) missing = User.get_id(404) self.assertEqual(missing, None)
def test_find_contacts_errors(self): self.FS["mloc_users.04"] = [dict(id=404)] self.FS["mloc_users.03"] = [dict(id=503)] with _patch_twitter(): self.gob.run_job("find_contacts") for uid in (404, 503): missing = User.get_id(uid) self.assertEqual(missing.error_status, uid) self.assertEqual(missing.neighbors, None) self.assertEqual(missing.rfriends, None) self.assertEqual(Edges.get_id(uid), None) self.assertEqual(Tweets.get_id(uid), None)
def find_contacts(user_ds): """ for each target user, fetch edges and tweets, pick 100 located contact ids """ gis = gisgraphy.GisgraphyResource() twit = twitter.TwitterResource() for user_d in itertools.islice(user_ds,2600): user = User.get_id(user_d['id']) if user: logging.warn("not revisiting %d",user._id) else: user = User(user_d) user.geonames_place = gis.twitter_loc(user.location) _save_user_contacts(twit, user, _pick_random_contacts, limit=100) for mod_nebr in _my_contacts(user): yield mod_nebr
def total_contacts(user_ds): """ count the total number of contacts (to include in the paper) """ for user_d in itertools.islice(user_ds,2600): user = User.get_id(user_d['id']) if not user: yield "no user" elif user.error_status: yield str(user.error_status) else: edges = Edges.get_id(user._id) tweets = Tweets.get_id(user._id) if not edges or not tweets: yield "no contacts" else: sets = _contact_sets(tweets,edges) yield [len(sets[k]) for k in User.NEBR_KEYS]
def edges_d(user_d, geo_ats): """ create one dict per target user with information about one selected contact for each of the four types of contact """ me = User(user_d) if not me.neighbors: return [] nebrs = set(me.neighbors) me_usa = _in_usa(me.median_loc[0],me.median_loc[1]) keys = {'just_followers':'jfol', 'just_friends':'jfrd', 'rfriends':'rfrd', 'just_mentioned':'jat'} rels = dict(_id = me._id, mloc = me.median_loc) for long,short in keys.iteritems(): amigos = [a for a in getattr(me,long) if a in nebrs] if not amigos: continue amigo = User.get_id(amigos[0]) gnp = amigo.geonames_place.to_d() if gnp['mdist']>1000: continue rels[short] = dict( folc=amigo.followers_count, frdc=amigo.friends_count, lofrd=amigo.local_friends, lofol=amigo.local_followers, prot=amigo.protected, lat=gnp['lat'], lng=gnp['lng'], mdist=gnp['mdist'], _id=amigo._id, i_at=_ated(geo_ats,me._id,amigo._id), u_at=_ated(geo_ats,amigo._id,me._id), usa = me_usa and _in_usa(gnp['lng'],gnp['lat']), ) return [rels]