Beispiel #1
0
def mloc_users(users_and_coords):
    """
    pick users with good home locations from geotweets
    """
    users, locs = _untangle_users_and_coords(users_and_coords)
    selected = []
    for uid,user in users.iteritems():
        spots = locs[uid]
        if len(spots)<=2: continue
        if user['followers_count']==0 and user['friends_count']==0: continue
        median = utils.median_2d(spots)
        dists = [utils.coord_in_miles(median,spot) for spot in spots]
        if numpy.median(dists)>50:
            continue #user moves too much
        user['mloc'] = median
        selected.append(user)
    random.shuffle(selected)
    return selected
Beispiel #2
0
def mloc_reject_count(users_and_coords):
    """
    count the number of users we ignored in mloc_users. (This job was done to
    calculate a number for the paper, and is almost trash.)
    """
    results = collections.defaultdict(int)
    users, locs = _untangle_users_and_coords(users_and_coords)
    for uid,user in users.iteritems():
        spots = locs[uid]
        if len(spots)<=2:
            results['spots']+=1
            continue
        median = utils.median_2d(spots)
        dists = [utils.coord_in_miles(median,spot) for spot in spots]
        if numpy.median(dists)>50:
            results['moves']+=1
        elif user['followers_count']==0 and user['friends_count']==0:
            results['counts']+=1
        else:
            results['good']+=1
    return results.iteritems()