def _find_race(user): user_id, ll = user lat = ll[0] lng = ll[1] for race, pols in loc_data.items(): for pol in pols: if pip.point_in_poly(lat, lng, pol): return [int(user_id), race[0]]
def loadAllHomeLoc(hbk_poly): # read all home locations hbk_user_home_loc = [] print 'Loading all user homes...' with open(my.DATA_FOLDER + '/' + my.HBK_USER_HOME_LOC_FILE, 'rb') as fp1: csv_reader = csv.reader(fp1, delimiter=',') for row in csv_reader: if len(row) > 0 and pip.point_in_poly(float(row[1]), float(row[2]), hbk_poly): hbk_user_home_loc.append([int(row[0]), float(row[1]), float(row[2])]) print str(len(hbk_user_home_loc)) + ' users with homes inside bounds.' return hbk_user_home_loc
def loadUsersInGangTty(tty_polys, hbk_user_home_loc): # read users with homes in each gang territory hbk_users_in_gang_t = {} print 'Loading user homes in each gang tty...' for user_home in hbk_user_home_loc: for gang_id in tty_polys: if pip.point_in_poly(float(user_home[1]), float(user_home[2]), tty_polys[gang_id]): if gang_id not in hbk_users_in_gang_t: hbk_users_in_gang_t[gang_id] = [] hbk_users_in_gang_t[gang_id].append(int(user_home[0])) g_list = dict([(gang_id, len(hbk_users_in_gang_t[gang_id])) for gang_id in hbk_users_in_gang_t]) print 'Gang IDs with homes inside them: %s' % g_list print 'Total home: %s' % (sum(g_list.values())) return hbk_users_in_gang_t
def calcVisitationMat__NEW(hbk_all_tweets, tty_polys, hbk_users_in_gang_t, dist_norm=None, hbk_user_home_loc=None): # visit_mat[i][j] = #tw(i) in j print 'Calculating visitation matrix...' hbk_home_list = {} if dist_norm: print '...for distance norm.' for user_home in hbk_user_home_loc: hbk_home_list[user_home[0]] = [user_home[1], user_home[2]] hbk_user_gang_list = [] for gang_id in hbk_users_in_gang_t: hbk_user_gang_list.extend([(user_id, gang_id) for user_id in hbk_users_in_gang_t[gang_id]]) hbk_user_gang_list = dict(hbk_user_gang_list) visit_mat = {} for gang_id in my.HBK_GANG_ID_LIST: visit_mat[gang_id] = {} for to_id in my.HBK_GANG_ID_LIST: visit_mat[gang_id][to_id] = 0 for tweet in hbk_all_tweets: from_id = hbk_user_gang_list[tweet[0]] for to_id in tty_polys: if pip.point_in_poly(tweet[1], tweet[2], tty_polys[to_id]): visit_mat[from_id][to_id] += 1 '''for gang_id in my.HBK_GANG_ID_LIST: if gang_id in hbk_users_in_gang_t: this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]) for to_id in my.HBK_GANG_ID_LIST: this_tty_tweets = prep.keepPolygon(this_gang_tweets, tty_polys[to_id]) if dist_norm == None: visit_mat[gang_id][to_id] = len(this_tty_tweets) else: visit_val = 0 for tweet in this_tty_tweets: dist = geo.distance(geo.xyz(tweet[1], tweet[2]), geo.xyz(hbk_home_list[tweet[0]][0], hbk_home_list[tweet[0]][1])) dist_i = int(round(dist/100 + 1)) visit_val += 1/dist_norm[dist_i] #print str(dist_i) + '\t=>\t' + str(1/dist_norm[dist_i]) visit_mat[gang_id][to_id] = round(visit_val, 5) print 'Done calculating visitation matrix...''' return visit_mat
def calc_tweet_freq_in_rival_home(): # # # import sys sys.path.append("/home/gambit/collector/gambit2/") from django.core.management import setup_environ from gambit import settings setup_environ(settings) from scraper.models import Location, Tweet counts = {} tty_counts = {} tty_polys = {} # Read location polygons for gang territories with open(my.DATA_FOLDER + '/' + my.LOCATION_DATA_FILE, 'rb') as fp1: location_data = anyjson.deserialize(fp1.read()) for loc in location_data: if loc['id'] >= 23 and loc['id'] <= 54: tty_polys[loc['id']] = loc['polygon'] # Count tweets for gang_id in my.HBK_GANG_AND_RIVAL_IDS: gang_data = {} with open(my.DATA_FOLDER + '/' + my.GANG_DATA_FOLDER + '/' + str(gang_id) + '.json', 'rb') as fp1: gang_data = anyjson.deserialize(fp1.read()) gang_center = calc_center(gang_data['location_polygon']) bounds = get_bounding_box(gang_center, my.BOUND_RADIUS_MILES) bbox = arr_to_str(bounds) counts[gang_id] = { 'gang' : { 'total' : 0, 'home' : 0, 'rival' : {} }, 'la' : { 'total' : 0, 'home' : 0, 'rival' : {} } } # Count tweets for all gang members counts[gang_id]['gang']['total'] = 0 counts[gang_id]['gang']['home'] = 0 for rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id]: counts[gang_id]['gang']['rival'][rival_id] = 0 for user_id in gang_data['users']: counts[gang_id]['gang']['total'] += len(gang_data['users'][user_id]['points_inside']) + len(gang_data['users'][user_id]['points_outside']) # Universe counts[gang_id]['gang']['home'] += len(gang_data['users'][user_id]['points_inside']) # Home tty for latlng in gang_data['users'][user_id]['points_outside']: for rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id]: if pip.point_in_poly(latlng[0], latlng[1], tty_polys[rival_id]): counts[gang_id]['gang']['rival'][rival_id] += 1 # Rival tty # Count tweets for all LA users print 'Django query... gang-id ' + str(gang_id) + '... universe.' tweets = Tweet.objects.all() loc = Location.parse_bbox(my.HBK_BIG_BOUNDS_string) tweets = tweets.filter(geo__within=loc) bbox = Location.parse_bbox(bbox) tweets = tweets.filter(geo__within=bbox) counts[gang_id]['la']['total'] = tweets.count() # Universe if gang_id in tty_counts: counts[gang_id]['la']['home'] = tty_counts[gang_id] else: print 'Django query... gang-id ' + str(gang_id) + '... home tty.' tweets = Tweet.objects.all() loc = Location.parse_bbox(my.HBK_BIG_BOUNDS_string) tweets = tweets.filter(geo__within=loc) polygon = Location.parse_polygon(arr_to_str(tty_polys[gang_id])) tweets = tweets.filter(geo__within=polygon) tty_counts[gang_id] = tweets.count() # Home tty counts[gang_id]['la']['home'] = tty_counts[gang_id] for rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id]: if rival_id in tty_counts: counts[gang_id]['la']['rival'][rival_id] = tty_counts[rival_id] else: print 'Django query... rival-id ' + str(rival_id) + '... rival tty.' tweets = Tweet.objects.all() loc = Location.parse_bbox(my.HBK_BIG_BOUNDS_string) tweets = tweets.filter(geo__within=loc) polygon = Location.parse_polygon(arr_to_str(tty_polys[rival_id])) tweets = tweets.filter(geo__within=polygon) tty_counts[rival_id] = tweets.count() # Rival tty counts[gang_id]['la']['rival'][rival_id] = tty_counts[rival_id] with open(my.DATA_FOLDER + '/' + my.OUTPUT_COUNT_FILE, 'wb') as fp2: fp2.write(anyjson.serialize(counts)) print counts
def keepPolygon(tweets, polygon): new_tweets = [] for tweet in tweets: if pip.point_in_poly(tweet[1], tweet[2], polygon): new_tweets.append(tweet) return new_tweets
def removePolygon(tweets, polygon): new_tweets = [] for tweet in tweets: if not pip.point_in_poly(tweet[1], tweet[2], polygon): new_tweets.append(tweet) return new_tweets