def mloc_users(users_and_coords): """ pick users with good home locations from geotweets """ users, locs = _untangle_users_and_coords(users_and_coords) selected = [] for uid,user in users.iteritems(): spots = locs[uid] if len(spots)<=2: continue if user['followers_count']==0 and user['friends_count']==0: continue median = utils.median_2d(spots) dists = [utils.coord_in_miles(median,spot) for spot in spots] if numpy.median(dists)>50: continue #user moves too much user['mloc'] = median selected.append(user) random.shuffle(selected) return selected
def mloc_reject_count(users_and_coords): """ count the number of users we ignored in mloc_users. (This job was done to calculate a number for the paper, and is almost trash.) """ results = collections.defaultdict(int) users, locs = _untangle_users_and_coords(users_and_coords) for uid,user in users.iteritems(): spots = locs[uid] if len(spots)<=2: results['spots']+=1 continue median = utils.median_2d(spots) dists = [utils.coord_in_miles(median,spot) for spot in spots] if numpy.median(dists)>50: results['moves']+=1 elif user['followers_count']==0 and user['friends_count']==0: results['counts']+=1 else: results['good']+=1 return results.iteritems()