Example #1
0
def _find_race(user):
	user_id, ll = user
	lat = ll[0]
	lng = ll[1]

	for race, pols in loc_data.items():
		for pol in pols:
			if pip.point_in_poly(lat, lng, pol):
				return [int(user_id), race[0]]
Example #2
0
def loadAllHomeLoc(hbk_poly):
	# read all home locations
	hbk_user_home_loc = []
	print 'Loading all user homes...'
	with open(my.DATA_FOLDER + '/' + my.HBK_USER_HOME_LOC_FILE, 'rb') as fp1:
		csv_reader = csv.reader(fp1, delimiter=',')
		for row in csv_reader:
			if len(row) > 0 and pip.point_in_poly(float(row[1]), float(row[2]), hbk_poly):
				hbk_user_home_loc.append([int(row[0]), float(row[1]), float(row[2])])
	print str(len(hbk_user_home_loc)) + ' users with homes inside bounds.'
	return hbk_user_home_loc
Example #3
0
def loadUsersInGangTty(tty_polys, hbk_user_home_loc):
	# read users with homes in each gang territory
	hbk_users_in_gang_t = {}
	print 'Loading user homes in each gang tty...'
	for user_home in hbk_user_home_loc:
		for gang_id in tty_polys:
			if pip.point_in_poly(float(user_home[1]), float(user_home[2]), tty_polys[gang_id]):
				if gang_id not in hbk_users_in_gang_t:
					hbk_users_in_gang_t[gang_id] = []
				hbk_users_in_gang_t[gang_id].append(int(user_home[0]))
	g_list = dict([(gang_id, len(hbk_users_in_gang_t[gang_id])) for gang_id in hbk_users_in_gang_t])
	print 'Gang IDs with homes inside them: %s' % g_list
	print 'Total home: %s' % (sum(g_list.values()))
	return hbk_users_in_gang_t
Example #4
0
def calcVisitationMat__NEW(hbk_all_tweets, tty_polys, hbk_users_in_gang_t, dist_norm=None, hbk_user_home_loc=None):
# visit_mat[i][j] = #tw(i) in j
	print 'Calculating visitation matrix...'
	
	hbk_home_list = {}
	if dist_norm:
		print '...for distance norm.'
		for user_home in hbk_user_home_loc:
			hbk_home_list[user_home[0]] = [user_home[1], user_home[2]]

	hbk_user_gang_list = []
	for gang_id in hbk_users_in_gang_t:
		hbk_user_gang_list.extend([(user_id, gang_id) for user_id in hbk_users_in_gang_t[gang_id]])
	hbk_user_gang_list = dict(hbk_user_gang_list)

	visit_mat = {}
	for gang_id in my.HBK_GANG_ID_LIST:
		visit_mat[gang_id] = {}
		for to_id in my.HBK_GANG_ID_LIST:
			visit_mat[gang_id][to_id] = 0

	for tweet in hbk_all_tweets:
		from_id = hbk_user_gang_list[tweet[0]]
		for to_id in tty_polys:
			if pip.point_in_poly(tweet[1], tweet[2], tty_polys[to_id]):
				visit_mat[from_id][to_id] += 1


	'''for gang_id in my.HBK_GANG_ID_LIST:
		if gang_id in hbk_users_in_gang_t:
			this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id])
			for to_id in my.HBK_GANG_ID_LIST:
				this_tty_tweets = prep.keepPolygon(this_gang_tweets, tty_polys[to_id])
				if dist_norm == None:
					visit_mat[gang_id][to_id] = len(this_tty_tweets)
				else:
					visit_val = 0
					for tweet in this_tty_tweets:
						dist = geo.distance(geo.xyz(tweet[1], tweet[2]), geo.xyz(hbk_home_list[tweet[0]][0], hbk_home_list[tweet[0]][1]))
						dist_i = int(round(dist/100 + 1))
						visit_val += 1/dist_norm[dist_i]
						#print str(dist_i) + '\t=>\t' + str(1/dist_norm[dist_i])
					visit_mat[gang_id][to_id] = round(visit_val, 5)
	print 'Done calculating visitation matrix...'''
	return visit_mat
Example #5
0
def calc_tweet_freq_in_rival_home():
# 
# 
# 
	import sys
	sys.path.append("/home/gambit/collector/gambit2/")
	from django.core.management import setup_environ
	from gambit import settings
	setup_environ(settings)

	from scraper.models import Location, Tweet


	counts = {}

	tty_counts = {}
	tty_polys = {}

	# Read location polygons for gang territories
	with open(my.DATA_FOLDER + '/' + my.LOCATION_DATA_FILE, 'rb') as fp1:
		location_data = anyjson.deserialize(fp1.read())
		for loc in location_data:
			if loc['id'] >= 23 and loc['id'] <= 54:
				tty_polys[loc['id']] = loc['polygon']

	# Count tweets
	for gang_id in my.HBK_GANG_AND_RIVAL_IDS:
		gang_data = {}
		with open(my.DATA_FOLDER + '/' + my.GANG_DATA_FOLDER + '/' + str(gang_id) + '.json', 'rb') as fp1:
			gang_data = anyjson.deserialize(fp1.read())
		gang_center = calc_center(gang_data['location_polygon'])
		bounds = get_bounding_box(gang_center, my.BOUND_RADIUS_MILES)
		bbox = arr_to_str(bounds)

		counts[gang_id] = {
			'gang' : {
				'total' : 0,
				'home' : 0,
				'rival' : {}
			},
			'la' : {
				'total' : 0,
				'home' : 0,
				'rival' : {}
			}
		}

		# Count tweets for all gang members
		counts[gang_id]['gang']['total'] = 0
		counts[gang_id]['gang']['home'] = 0
		for rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id]:
			counts[gang_id]['gang']['rival'][rival_id] = 0

		for user_id in gang_data['users']:
			counts[gang_id]['gang']['total'] += len(gang_data['users'][user_id]['points_inside']) + len(gang_data['users'][user_id]['points_outside'])	# Universe

			counts[gang_id]['gang']['home'] += len(gang_data['users'][user_id]['points_inside'])		# Home tty

			for latlng in gang_data['users'][user_id]['points_outside']:
				for rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id]:
					if pip.point_in_poly(latlng[0], latlng[1], tty_polys[rival_id]):
						counts[gang_id]['gang']['rival'][rival_id] += 1									# Rival tty

		# Count tweets for all LA users
		print 'Django query... gang-id ' + str(gang_id) + '... universe.'
		tweets = Tweet.objects.all()
		loc = Location.parse_bbox(my.HBK_BIG_BOUNDS_string)
		tweets = tweets.filter(geo__within=loc)
		bbox = Location.parse_bbox(bbox)
		tweets = tweets.filter(geo__within=bbox)
		counts[gang_id]['la']['total'] = tweets.count()				# Universe

		if gang_id in tty_counts:
			counts[gang_id]['la']['home'] = tty_counts[gang_id]
		else:
			print 'Django query... gang-id ' + str(gang_id) + '... home tty.'
			tweets = Tweet.objects.all()
			loc = Location.parse_bbox(my.HBK_BIG_BOUNDS_string)
			tweets = tweets.filter(geo__within=loc)
			polygon = Location.parse_polygon(arr_to_str(tty_polys[gang_id]))
			tweets = tweets.filter(geo__within=polygon)
			tty_counts[gang_id] = tweets.count()								# Home tty
			counts[gang_id]['la']['home'] = tty_counts[gang_id]

		for rival_id in my.HBK_GANG_AND_RIVAL_IDS[gang_id]:
			if rival_id in tty_counts:
				counts[gang_id]['la']['rival'][rival_id] = tty_counts[rival_id]
			else:
				print 'Django query... rival-id ' + str(rival_id) + '... rival tty.'
				tweets = Tweet.objects.all()
				loc = Location.parse_bbox(my.HBK_BIG_BOUNDS_string)
				tweets = tweets.filter(geo__within=loc)
				polygon = Location.parse_polygon(arr_to_str(tty_polys[rival_id]))
				tweets = tweets.filter(geo__within=polygon)
				tty_counts[rival_id] = tweets.count()							# Rival tty
				counts[gang_id]['la']['rival'][rival_id] = tty_counts[rival_id]

	with open(my.DATA_FOLDER + '/' + my.OUTPUT_COUNT_FILE, 'wb') as fp2:
		fp2.write(anyjson.serialize(counts))
	print counts
Example #6
0
def keepPolygon(tweets, polygon):
	new_tweets = []
	for tweet in tweets:
		if pip.point_in_poly(tweet[1], tweet[2], polygon):
			new_tweets.append(tweet)
	return new_tweets
Example #7
0
def removePolygon(tweets, polygon):
	new_tweets = []
	for tweet in tweets:
		if not pip.point_in_poly(tweet[1], tweet[2], polygon):
			new_tweets.append(tweet)
	return new_tweets