Exemple #1
0
def _calc_visits(user_id):
	con = psycopg2.connect(my.DB_CONN_STRING)
	cur = con.cursor()
	SQL = '''SELECT ST_X(geo), ST_Y(geo) \
			FROM  {rel_tweet} \
				WHERE user_id = %s \
			'''.format(rel_tweet=my.REL_TWEET)
	cur.execute(SQL, (user_id, ))
	recs = cur.fetchall()
	con.close()

	home = homes[str(user_id)]

	visits = [0]*6
	legend = {'w': 0, 'b': 1, 'a': 2, 'h': 3, 'o': 4}

	for rec in recs:
		lat, lng = rec
		dist = int(round(geo.distance(geo.xyz(home[0], home[1]), 
									geo.xyz(lat, lng))))
		if dist > my.MIN_DIST:
			race = _find_race([user_id, [lat, lng]])
			if race:
				visits[legend[race[1]]] += 1
			else:
				visits[5] += 1

	print [user_id, visits]
	return [user_id, visits]
Exemple #2
0
def _find_dir_list():
	'''Find the list of direction for all users in region. [user_directions.csv]'''
	with open('data/' + my.DATA_FOLDER + 'user_list.json', 'rb') as fpr:
		user_ids = anyjson.loads(fpr.read())
	user_ids = [int(user_id) for user_id in user_ids]
	print 'Read {0} user_ids'.format(len(user_ids))

	user_directions = []
	user_dir_trimmed = []
	con = psycopg2.connect(my.DB_CONN_STRING)
	cur = con.cursor()
	for user_id in user_ids:
		SQL = 'SELECT ST_X(geo), ST_Y(geo) \
			FROM {rel_home} \
			WHERE user_id = %s'.format(rel_home=my.REL_HOME)
		cur.execute(SQL, (user_id,))
		records = cur.fetchall()

		if len(records) > 0:
			home = records[0]
			hx, hy = home[1], home[0]

			SQL = 'SELECT ST_X(geo), ST_Y(geo) \
				FROM {rel_tweet} \
				WHERE user_id = %s'.format(rel_tweet=my.REL_TWEET, rel_home=my.REL_HOME) \
				+ my.QUERY_CONSTRAINT
			cur.execute(SQL, (user_id,))
			records = cur.fetchall()

			for rec in records:
				lat, lng = rec
				x, y = lng-hx, lat-hy
				if x != 0 and y != 0:
					deg = int(round(_calc_angle(x, y)))
					user_directions.append([user_id, deg])
					try:
						dist = int(round(geo.distance(geo.xyz(hy, hx), geo.xyz(lat, lng))))
					except:
						dist = 0
					if dist > my.MIN_DIR_DIST:
						user_dir_trimmed.append([user_id, deg])
		else:
			print 'Missed 1 user_id!'
	
	con.close()
	with open('data/' + my.DATA_FOLDER + 'displacement/' + 'user_directions.csv', 'wb') as fpw:
		cw = csv.writer(fpw, delimiter=',')
		for row in user_directions:
			cw.writerow(row)
	with open('data/' + my.DATA_FOLDER + 'displacement/' + 'user_dir_trimmed.csv', 'wb') as fpw:
		cw = csv.writer(fpw, delimiter=',')
		for row in user_dir_trimmed:
			cw.writerow(row)
	# Statistics
	x = [d[1] for d in user_directions]
	print len(x), min(x), max(x), sum(x)/len(x)
	x = [d[1] for d in user_dir_trimmed]
	print len(x), min(x), max(x), sum(x)/len(x)
Exemple #3
0
def _calc_distToHoods(from_id, centroids):
# Calculate distance to all centroids from the centroid of from_id
	dists = {}

	for to_id in centroids:
		if to_id != from_id:
			dists[to_id] = int(geo.distance(geo.xyz(centroids[from_id][0], centroids[from_id][1]), \
										geo.xyz(centroids[to_id][0], centroids[to_id][1])))
	return dists
Exemple #4
0
def calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t, dist_norm=None, hbk_user_home_loc=None):
# visit_mat[i][j] = #tw(i) in j
	print 'Calculating visitation matrix...'

	# Load visit matrix .pickle if exists
	if not dist_norm and os.path.exists('data/' + my.DATA_FOLDER + 'json/visit_mat.pickle'):
		with open('data/' + my.DATA_FOLDER  + 'json/' + 'visit_mat.pickle', 'rb') as fp1:
			visit_mat = pickle.load(fp1)
	elif dist_norm and os.path.exists('data/' + my.DATA_FOLDER + 'json/visit_mat__dist_norm.pickle'):
		with open('data/' + my.DATA_FOLDER  + 'json/' + 'visit_mat__dist_norm.pickle', 'rb') as fp1:
			visit_mat = pickle.load(fp1)
	# Calculate visit matrix is .pickle doesn't exist
	else:
		hbk_home_list = {}
		if dist_norm:
			print '...for distance norm.'
			for user_home in hbk_user_home_loc:
				hbk_home_list[user_home[0]] = [user_home[1], user_home[2]]

		visit_mat = {}
		for gang_id in my.HBK_GANG_ID_LIST:
			visit_mat[gang_id] = {}

		for gang_id in my.HBK_GANG_ID_LIST:
			if gang_id not in hbk_users_in_gang_t:
				for to_id in my.HBK_GANG_ID_LIST:
					visit_mat[gang_id][to_id] = 0
					#visit_mat[to_id][gang_id] = 0
			else:
				this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id])
				for to_id in my.HBK_GANG_ID_LIST:
					this_tty_tweets = prep.keepPolygon(this_gang_tweets, tty_polys[to_id])
					if dist_norm == None:
						visit_mat[gang_id][to_id] = len(this_tty_tweets)
					else:
						visit_val = 0
						for tweet in this_tty_tweets:
							dist = geo.distance(geo.xyz(tweet[1], tweet[2]), geo.xyz(hbk_home_list[tweet[0]][0], hbk_home_list[tweet[0]][1]))
							dist_i = int(round(dist/100 + 1))
							visit_val += 1/dist_norm[dist_i]
							#print str(dist_i) + '\t=>\t' + str(1/dist_norm[dist_i])
						visit_mat[gang_id][to_id] = round(visit_val, 5)
		print 'Done calculating visitation matrix...'

		# Store visit matrix .pickle
		if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'):
			os.makedirs('data/' + my.DATA_FOLDER + 'json/')
		if not dist_norm:
			with open('data/' + my.DATA_FOLDER  + 'json/' + 'visit_mat.pickle', 'wb') as fp1:
				pickle.dump(visit_mat, fp1)
		else:
			with open('data/' + my.DATA_FOLDER  + 'json/' + 'visit_mat__dist_norm.pickle', 'wb') as fp1:
				pickle.dump(visit_mat, fp1)

	return visit_mat
def _closest_dist(pol_a, pol_b):
	'''Find the closest distance between pol_a and pol_b.
	Closest among set of end points of line segments in pol_a and pol_b'''
	min_dist = 15000
	for a in pol_a:
		for b in pol_b:
			try:
				dist = int(geo.distance(geo.xyz(a[0], a[1]), geo.xyz(b[0], b[1])))
				min_dist = dist if dist < min_dist else min_dist
			except:
				print 'Error calculating distance!'
	return min_dist
def _territory_span(pol):
	'''Find the spanning distance of the territory.
	i.e. the maximum distance between any two end points of line segments in pol'''
	max_dist = 0
	for a in pol:
		for b in pol:
			try:
				dist = int(geo.distance(geo.xyz(a[0], a[1]), geo.xyz(b[0], b[1])))
				max_dist = dist if dist > max_dist else max_dist
			except:
				print 'Error calculating distance!'
	return max_dist
Exemple #7
0
def _is_interaction(tw1, tw2):
	lat1, lng1, _, ts1, _ = tw1
	lat2, lng2, _, ts2, _ = tw2

	dist = geo.distance(geo.xyz(lat1, lng1), geo.xyz(lat2, lng2))
	tds = ts1 - ts2
	tds = abs(tds.total_seconds())

	if dist <= my.MAX_INTERACTION_DIST and tds <= my.MAX_INTERACTION_TIME:
		#print dist, tds
		return True
	else:
		return False
Exemple #8
0
def get_bounding_box(center, miles):
	this_point = [center[0], center[1]]
	while geo.distance(geo.xyz(center[0], center[1]), geo.xyz(this_point[0], this_point[1])) <= (miles*my.CONST_MILE_TO_METER):
		this_point[0] += 0.0001	# lat
	lat_hi = this_point[0]
	this_point = [center[0], center[1]]
	while geo.distance(geo.xyz(center[0], center[1]), geo.xyz(this_point[0], this_point[1])) <= (miles*my.CONST_MILE_TO_METER):
		this_point[0] -= 0.0001	# lat
	lat_lo = this_point[0]
	if lat_lo > lat_hi:
		lat_hi, lat_lo = lat_lo, lat_hi

	this_point = [center[0], center[1]]		
	while geo.distance(geo.xyz(center[0], center[1]), geo.xyz(this_point[0], this_point[1])) <= (miles*my.CONST_MILE_TO_METER):
		this_point[1] += 0.0001	# lng
	lng_hi = this_point[1]
	this_point = [center[0], center[1]]		
	while geo.distance(geo.xyz(center[0], center[1]), geo.xyz(this_point[0], this_point[1])) <= (miles*my.CONST_MILE_TO_METER):
		this_point[1] -= 0.0001	# lng
	lng_lo = this_point[1]
	if lng_lo > lng_hi:
		lng_hi, lng_lo = lng_lo, lng_hi

	#return [[lat_lo, lng_lo], [lat_hi, lng_lo], [lat_hi, lng_hi], [lat_lo, lng_hi]]	#polygon
	return [[lat_lo, lng_lo], [lat_hi, lng_hi]]		#bbox
Exemple #9
0
def removeNearPoints(tweets, points, radius):
	print points
	new_tweets = []
	for tweet in tweets:
		if (tweet[0] not in points) or geo.distance(geo.xyz(tweet[1], tweet[2]), geo.xyz(points[tweet[0]][0], points[tweet[0]][1])) > radius:
			new_tweets.append(tweet)
		#inside = False
		#for point in points:
			#if geo.distance(geo.xyz(tweet[1], tweet[2]), geo.xyz(point[0], point[1])) < radius:
				#inside = True
				#break
		#if not inside:
			#new_tweets.append(tweet)
	return new_tweets
Exemple #10
0
def isNear(point, line):
	[[x1,y1], [x2,y2]] = line
	[px, py] = point

	if ((py<=y1 and py>=y2) or (py>=y1 and py<=y2))	and \
		(px<=x1 and px>=x2) or (px>=x1 and px<=x2):
		# If inside the box - calc perpendicular distance
		if x1 == x2:	# vertical line
			#return False
			x = x1
			if (py<=y1 and py>=y2) or (py>=y1 and py<=y2):
				y = py
			else:
				y = y1 if abs(py-y1) < abs(py-y2) else y2

		elif y1 == y2:	# horizontal line
			#return False
			y = y1
			if (px<=x1 and px>=x2) or (px>=x1 and px<=x2):
				x = px
			else:
				x = x1 if abs(px-x1) < abs(px-x2) else x2

		else:		# usual line
			m = (y2-y1)/(x2-x1)
			c = (y1 - m*x1)
			x = (m*py + px -m*c) / (m*m + 1)
			y = (m*m*py + m*px + c) / (m*m + 1)
			# Initial
			#m = (y2-y1)/(x2-x1)
			#x = (px + m * (m*x1 - y1 + py)) / (1 + m*m)
			#y = py - (x - px)/m

		try:
			if geo.distance(geo.xyz(px, py), geo.xyz(x, y)) < my.BORDER_LINE_SPAN:
				return True
			else:
				return False
		except Exception:
			print 'Couldn\'t calculate geo.distance'
		return False

	else:
		# If outside box - calc dist from end points of line
		if geo.distance(geo.xyz(px, py), geo.xyz(x1, y1)) < my.BORDER_LINE_SPAN or geo.distance(geo.xyz(px, py), geo.xyz(x2, y2)) < my.BORDER_LINE_SPAN :
			return True
		else:
			return False
Exemple #11
0
def find_most_visited_loc():
	points = []
	results = []
	clusters = []

	with open(my.DATA_FOLDER + '/' + my.HBK_HOME_LOC_FILE, 'rb') as fp1:
		csv_reader = csv.reader(fp1, delimiter=',')
		for row in csv_reader:
			current_user = row[0]
			current_user_home = geo.xyz(float(row[1].strip()), float(row[2].strip()))
			try:
				with open(my.DATA_FOLDER + '/' + my.USER_TWEET_LOC_FOLDER + '/' + str(my.HBK_LOCATION_ID) + '/' + str(current_user) + '.csv', 'rb') as fp2:
					csv_reader2 = csv.reader(fp2, delimiter = ',')

					for row2 in csv_reader2:
						if row2[0].strip().__len__() != 0 and row2[1].strip().__len__() != 0:
							# if not near user's home
							this_point = geo.xyz(float(row2[0].strip()), float(row2[1].strip()))
							if int(round(geo.distance(current_user_home, this_point))) > 100:
								points.append([float(row2[0].strip()), float(row2[1].strip())])
			except IOError as e:
					print 'No file found for user... ' + str(current_user)

	print 'Total latlng pairs read: ' + str(len(points))
	if len(points) != 0:
		print 'Running DBScan... '
		results = dbscan(points, my.DBSCAN_EPSILON, my.DBSCAN_MIN_POINTS)
		print 'Run complete... Number of clusters = ' + str(len(results))

		for key in results:
			if key != -1:
				center = calc_center(results[key])
				clusters.append([center[0], center[1], len(results[key])])

		fp3 = open(my.DATA_FOLDER + '/' + my.MOST_VISITED_LOC_FILE, 'wb')
		csv_writer = csv.writer(fp3, delimiter=',')
		for row in clusters:
			csv_writer.writerow(row)
		fp3.close

		with open(my.DATA_FOLDER + '/' + my.MOST_VISITED_LOC_FILE_json, 'wb') as fp3:
			fp3.write(anyjson.serialize(clusters))
Exemple #12
0
def _get_points(user_id):
	con = psycopg2.connect(my.DB_CONN_STRING)
	cur = con.cursor()
	SQL = '''SELECT ST_X(geo), ST_Y(geo) \
			FROM  {rel_tweet} \
				WHERE user_id = %s \
			'''.format(rel_tweet=my.REL_TWEET)
	cur.execute(SQL, (user_id, ))
	recs = cur.fetchall()
	con.close()

	home = homes[str(user_id)]

	points = []

	for rec in recs:
		lat, lng = rec
		dist = int(round(geo.distance(geo.xyz(home[0], home[1]), 
									geo.xyz(lat, lng))))
		if dist > my.MIN_DIST:
			points.append([round(lat,5), round(lng,5)])

	return [race_lookup[user_id], points]
Exemple #13
0
def _get_dist(user_id):
	con = psycopg2.connect(my.DB_CONN_STRING)
	cur = con.cursor()
	SQL = '''SELECT ST_X(geo), ST_Y(geo) \
			FROM  {rel_tweet} \
				WHERE user_id = %s \
			'''.format(rel_tweet=my.REL_TWEET)
	cur.execute(SQL, (user_id, ))
	recs = cur.fetchall()
	con.close()

	home = homes[str(user_id)]

	disp = []

	for rec in recs:
		lat, lng = rec
		dist = int(round(geo.distance(geo.xyz(home[0], home[1]), 
									geo.xyz(lat, lng))))
		if dist > 100:
			disp.append(dist)

	if len(disp) > 0:
		return disp
def make_feature_mat(mat, links, folder, file_name):
	X 		= []
	y 		= []
	centers = _load_nhood_centers()
	pols 	= _load_nhood_polygons()
	mat_f	= _calc_mat_frac(mat)

	for a, b, label in links:
		instance = [
			int(geo.distance(geo.xyz(centers[a][0], centers[a][1]), geo.xyz(centers[b][0], centers[b][1]))), # CENTROID_DIST
			_closest_dist(pols[a], pols[b]), # CLOSEST_DIST
			max(_territory_span(pols[a]), _territory_span(pols[b])), # MAX_TTY_SPAN
			abs(_territory_span(pols[a]) - _territory_span(pols[b])), #TTY_SPAN_DIFF
			pow(max(_territory_span(pols[a]), _territory_span(pols[b])), 2), # SPAN_SQ
			
			mat_f[a][b] + mat_f[b][a], # TOTAL_VISITS
			(mat_f[a][b] + mat_f[b][a])/2, # AVG_VISITS
			
			_in_density(a, mat_f) + _in_density(b, mat_f), # IN_DENSITY_ApB
			abs(_in_density(a, mat_f) - _in_density(b, mat_f)), # IN_DENSITY_AmB
			_out_density(a, mat_f) + _out_density(b, mat_f), # OUT_DENSITY_ApB
			abs(_out_density(a, mat_f) - _out_density(b, mat_f)), # OUT_DENSITY_AmB

			_in_entropy(a, mat_f) + _in_entropy(b, mat_f),
			_out_entropy(a, mat_f) + _out_entropy(b, mat_f),

			_in_cross_entropy(a, b, mat_f), # IN_CROSS_ENTROPY
			_out_cross_entropy(a, b, mat_f), # OUT_CROSS_ENTROPY
			]
		X.append(instance)
		y.append(label)

	Xy = {'X': X, 'y': y}
	with open('data/' + my.DATA_FOLDER  + 'predict_rivalry/' + folder + file_name + '/' + 'Xy.pickle', 'wb') as fp1:
		pickle.dump(Xy, fp1)
	return Xy
Exemple #15
0
def _calc_visits_dist_norm(user_id):
	con = psycopg2.connect(my.DB_CONN_STRING)
	cur = con.cursor()
	SQL = '''SELECT ST_X(geo), ST_Y(geo) \
			FROM  {rel_tweet} \
				WHERE user_id = %s \
			'''.format(rel_tweet=my.REL_TWEET)
	cur.execute(SQL, (user_id, ))
	recs = cur.fetchall()
	con.close()

	home = homes[str(user_id)]

	visits = [0.0]*6
	legend = {'w': 0, 'b': 1, 'a': 2, 'h': 3, 'o': 4}

	with open('data/' + my.DATA_FOLDER + 'user_disp_param.json', 'rb') as fp:
		param = anyjson.loads(fp.read())
	amp = param['amp']
	index = param['index']
	powerlaw = lambda x: amp * (x**index)

	for rec in recs:
		lat, lng = rec
		dist = int(round(geo.distance(geo.xyz(home[0], home[1]), 
									geo.xyz(lat, lng))))
		if dist > my.MIN_DIST:
			weight = 1 - powerlaw(dist)
			race = _find_race([user_id, [lat, lng]])
			if race:
				visits[legend[race[1]]] += weight
			else:
				visits[5] += weight
	visits = [round(i, 4) for i in visits]
	print [user_id, visits]
	return [user_id, visits]
Exemple #16
0
def plot_rivalry(folder, file_name):
	with open('data/' + my.DATA_FOLDER  + 'predict_rivalry/' + folder + file_name + '/' + 'links.pickle', 'rb') as fp1:
		links = pickle.load(fp1)
	with open('data/' + my.DATA_FOLDER  + 'predict_rivalry/' + folder + file_name + '/' + 'predicted_links.pickle', 'rb') as fp1:
		links_p = pickle.load(fp1)
	with open('data/' + my.DATA_FOLDER  + 'predict_rivalry/' + folder + file_name + '/' + 'info.txt', 'rb') as fp1:
		info = fp1.read()
	centers = _load_nhood_centers()
	names   = _load_nhood_names()
	with open('data/' + my.DATA_FOLDER + 'rivalry_baseline.pickle', 'rb') as fp:
		rivalry_baseline = pickle.load(fp)


	actual    = [(np.array([centers[a], centers[b]]), y) for a, b, y in links]
	baseline  = []
	predicted = []

	for i in range(len(links)):
		a, b, y = links_p[i]
		if y != links[i][2]:
			y += 2
		predicted.append((np.array([centers[a], centers[b]]), y))
		y = rivalry_baseline[a][b]
		#y = my.BASELINE_PREDICTION[(a, b)] if (a, b) in my.BASELINE_PREDICTION else my.BASELINE_PREDICTION[(b, a)]
		if y != links[i][2]:
			y += 2
		baseline.append((np.array([centers[a], centers[b]]), y))
	y_ 		= [y for v,y in actual]
	y_pred 	= [y for v,y in baseline]
	true 	= len([1 for i in range(len(y_)) if y_[i]==y_pred[i]])
	true_r 	= len([1 for i in range(len(y_)) if y_[i]==1 and y_[i]==y_pred[i]])
	miss 	= len(y_) - true
	acc 	= true / float(len(y_pred))
	acc_r 	= true_r / float(len([1 for i in range(len(y_)) if y_[i]==1]))
	base_info  = 'Links: ' + '{0}'.ljust(10).format(str(len(y_))) + '\n'
	base_info += 'Network acc.: ' + '{0}'.format(str(round(acc*100, 2)) + '%') + '\n'
	base_info += 'Rivalry acc.: ' + '{0}'.format(str(round(acc_r*100, 2)) + '%') + '\n'


	pols = []
	for pol in _load_nhood_polygons().values():
		pol = [[ll[1], ll[0]] for ll in pol]
		pols.append(pol)
	lngs = [ll[0] for ll in pol for pol in pols]
	lats = [ll[1] for ll in pol for pol in pols]
	print max(lngs), min(lngs), max(lats), min(lats)
	## MIGHT NEED TO SWAP x_dist and y_dist
	y_dist = geo.distance(geo.xyz(max(lats), max(lngs)), geo.xyz(max(lats), min(lngs)))
	x_dist = geo.distance(geo.xyz(max(lats), max(lngs)), geo.xyz(min(lats), max(lngs)))
	print x_dist, y_dist

	heat = [1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3]
	shuffle(heat)
	heat = np.array(heat)

	#fig = plt.figure(figsize=(1.5* 3*4, 1.5* 6))
	#plt.subplots_adjust(left=0.02, right=0.98, top=0.99, bottom=0.0)

	#
	# Map
	#
	'''
	markers = {
			29: (-118.21769714355469, 34.074559310537),
			25: (-118.20585250854492, 34.08948780782094),
			33: (-118.17117691040039, 34.08692882376708),
			42: (-118.15933227539062, 34.097306446504355),
			37: (-118.18439483642578, 34.08394324461533),
			47: (-118.19400787353516, 34.08422759002247),
			32: (-118.19211959838867, 34.080246667433315),
			36: (-118.20293426513672, 34.081099738028236),
			31: (-118.20653915405273, 34.0729952204399),
			41: (-118.20121765136719, 34.07143110146333),
			44: (-118.16946029663086, 34.06787617820785),
			26: (-118.19709777832031, 34.059628181822184),
			46: (-118.22164535522461, 34.05102381295824),
			50: (-118.2227611541748, 34.045476732062944),
			30: (-118.22190284729004, 34.041138377469416),
			49: (-118.21074485778809, 34.05130826886282),
			39: (-118.20259094238281, 34.0488192473379),
			52: (-118.19701194763184, 34.05166383740143),
			48: (-118.19486618041992, 34.050028209776336),
			40: (-118.1960678100586, 34.04327202221684),
			43: (-118.20293426513672, 34.043556504127444),
			35: (-118.2030200958252, 34.03843568373248),
			54: (-118.20405006408691, 34.03139405087606),
			45: (-118.20379257202148, 34.022786817002),
			23: (-118.2143497467041, 34.02392501371833),
			53: (-118.20671081542969, 34.02051037777654),
			38: (-118.19520950317383, 34.018803008289744),
			28: (-118.21610927581787, 34.04700577135851),
			51: (-118.2134485244751, 34.047432475078324),
			27: (-118.21108818054199, 34.04618791656029),
			34: (-118.2070541381836, 34.044658862517366)}
	fig = plt.figure(figsize=(1.5* 4, 1.5* 6))
	plt.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.0)
	ax = fig.add_subplot(111, aspect=1.2)
	coll = PolyCollection(pols, array=heat, cmap=mpl.cm.Accent, edgecolors='#111111', alpha=0.75)
	ax.add_collection(coll)
	ax.autoscale_view()
	ax.get_xaxis().set_ticklabels([])
	ax.get_yaxis().set_ticklabels([])
	count = 0
	id_map = {}
	for h_id in markers:
		count += 1
		id_map[h_id] = count
		x, y = markers[h_id]
		ax.text(x, y, str(count), backgroundcolor='#dddddd', color='#000000', fontsize=10, alpha=0.8, fontproperties=FontProperties(weight='bold'))
	ids = markers.keys()
	info1 = '\n'.join([str(str(id_map[i]) + ' : ' + names[i]) for i in ids[:8]])
	info2 = '\n'.join([str(str(id_map[i]) + ' : ' + names[i]) for i in ids[8:]])
	ax.text(0.05, 0.99, info1, ha='left', va='top', transform=ax.transAxes, fontsize=12)
	ax.text(0.6, 0.01, info2, ha='left', va='bottom', transform=ax.transAxes, fontsize=12)
	plt.savefig('data/' + my.DATA_FOLDER  + 'predict_rivalry/' + folder + file_name + '/' + '_map' + '.pdf')
	'''

	#
	# Actual
	#
	fig = plt.figure(figsize=(1.5* 4, 1.5* 6))
	plt.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.0)
	ax = fig.add_subplot(111, aspect=1.2) 
	#coll = PolyCollection(pols, array=heat, cmap=mpl.cm.Dark2, edgecolors='k', alpha=0.3)
	coll = PolyCollection(pols, facecolors='none', edgecolors='k', linewidths=1, alpha=0.3)
	ax.add_collection(coll)
	ax.autoscale_view()
	ax.get_xaxis().set_ticklabels([])
	ax.get_yaxis().set_ticklabels([])
	#ax.set_title('Actual')
	for vertices, y in actual:
		if y == 1:
			ax.plot(vertices[:,0], vertices[:,1], color=my.CMAP[y], alpha=0.95, linewidth=my.LINEWIDTH[y])
		else:
			ax.plot(vertices[:,0], vertices[:,1], color=my.CMAP[y], alpha=0.75, linewidth=1, linestyle=my.LINESTYLE[y])
	plt.savefig('data/' + my.DATA_FOLDER  + 'predict_rivalry/' + folder + file_name + '/' + '_actual' + '.pdf')

	#
	# Baseline
	#
	fig = plt.figure(figsize=(1.5* 4, 1.5* 6))
	plt.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.0)
	ax = fig.add_subplot(111, aspect=1.2) 
	#coll = PolyCollection(pols, array=heat, cmap=mpl.cm.Dark2, edgecolors='k', alpha=0.3)
	coll = PolyCollection(pols, facecolors='none', edgecolors='k', linewidths=1, alpha=0.3)
	ax.add_collection(coll)
	ax.autoscale_view()
	ax.get_xaxis().set_ticklabels([])
	ax.get_yaxis().set_ticklabels([])
	#ax.set_title('Baseline')
	ax.text(0.98, 0.05, base_info, horizontalalignment='right', verticalalignment='bottom', transform = ax.transAxes, fontsize=19)
	for vertices, y in baseline:
		if y == 1:
			ax.plot(vertices[:,0], vertices[:,1], color=my.CMAP[y], alpha=0.95, linewidth=my.LINEWIDTH[y])
		else:
			ax.plot(vertices[:,0], vertices[:,1], color=my.CMAP[y], alpha=0.9, linewidth=my.LINEWIDTH[y], linestyle=my.LINESTYLE[y])
	plt.savefig('data/' + my.DATA_FOLDER  + 'predict_rivalry/' + folder + file_name + '/' + '_baseline' + '.pdf')
	
	#
	# Predicted
	#
	fig = plt.figure(figsize=(1.5* 4, 1.5* 6))
	plt.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.0)
	ax = fig.add_subplot(111, aspect=1.2) 
	#coll = PolyCollection(pols, array=heat, cmap=mpl.cm.winter, edgecolors='k', alpha=0.2)
	coll = PolyCollection(pols, facecolors='none', edgecolors='k', linewidths=1, alpha=0.3)
	ax.add_collection(coll)
	ax.autoscale_view()
	ax.get_xaxis().set_ticklabels([])
	ax.get_yaxis().set_ticklabels([])
	#ax.set_title('Predicted')
	ax.text(0.98, 0.05, info, horizontalalignment='right', verticalalignment='bottom', transform = ax.transAxes, fontsize=19)
	for vertices, y in predicted:
		if y == 1:
			ax.plot(vertices[:,0], vertices[:,1], color=my.CMAP[y], alpha=0.95, linewidth=my.LINEWIDTH[y])
		else:
			ax.plot(vertices[:,0], vertices[:,1], color=my.CMAP[y], alpha=0.9, linewidth=my.LINEWIDTH[y], linestyle=my.LINESTYLE[y])
	plt.savefig('data/' + my.DATA_FOLDER  + 'predict_rivalry/' + folder + file_name + '/' + '_predicted' + '.pdf')
Exemple #17
0
def make_grid():
	path = 'data/' + my.DATA_FOLDER + 'artificial/'
	if not os.path.exists(path): os.makedirs(path)

	lat1 = ( my.LAT_RANGE[0] / my.DELTA_METERS ) * my.LAT_DELTA
	lat2 = ( my.LAT_RANGE[1] / my.DELTA_METERS ) * my.LAT_DELTA
	lng1 = ( my.LNG_RANGE[0] / my.DELTA_METERS ) * my.LNG_DELTA
	lng2 = ( my.LNG_RANGE[1] / my.DELTA_METERS ) * my.LNG_DELTA
	print lng1, lng2, lat1, lat2
	X = np.arange(lng1, lng2, my.LNG_DELTA)
	Y = np.arange(lat1, lat2, my.LAT_DELTA)
	#XY = list( itertools.product(X, Y) )
	#print len(XY)
	#XY = np.array( XY )
	#print len(X), len(Y), len(X) * len(Y), XY.shape, len(XY), len(XY.tolist())
	#print XY.reshape(len(X), len(Y), 2).reshape(len(X), len(Y), 2)
	#XY = XY.reshape(len(X), len(Y), 2).tolist()

	#with open(path + 'grid_coordinates.json', 'wb') as fp:
	#	fp.write( anyjson.dumps( XY ) )

	with open('data/' + my.DATA_FOLDER + 'user_disp_param.json', 'rb') as fp:
		disp_param = anyjson.loads(fp.read())
	amp = disp_param['amp']
	index = disp_param['index']
	powerlaw = lambda x: amp * (x**index) if x != 0 else 0

	delta = []
	theta = []
	prob = []
	points = []		# stored as (y, x) or (lat, lng)
	for y in Y:
		this_delta = []
		this_theta = []
		this_prob = []
		for x in X:
			dist = int(round( geo.distance(	geo.xyz(0, 0), geo.xyz(y, x) )))
			this_delta.append( powerlaw(dist) ) 
			this_theta.append( 1.0/360 )
			this_prob.append( powerlaw(dist) * 1.0/360 )
			points.append( (y, x, powerlaw(dist) * 1.0/360) )
		delta.append(this_delta)
		theta.append(this_theta)
		prob.append(this_prob)
	
	all_prob = list(itertools.chain(*prob))
	print min(all_prob), max(all_prob)
	mn = min(tuple(i for i in all_prob if i!=0))
	print sum(int(round(i/mn)) for i in all_prob)
	
	points_ = []
	for p in points:
		for i in range(int(round( p[2]/mn ))):
			points_.append( ( round(p[0], 4), round(p[1], 4) ) )
	print len(points), len(points_)
	with open(path + 'artificial_points.json', 'wb') as fp:
		fp.write( anyjson.dumps( points_ ) )
	
	with open(path + 'grid_delta.json', 'wb') as fp:
		fp.write( anyjson.dumps( delta ) )
		#fp.write( jsb.beautify( anyjson.dumps( delta ) ) )
	with open(path + 'grid_theta.json', 'wb') as fp:
		fp.write( anyjson.dumps( theta ) )
		#fp.write( jsb.beautify( anyjson.dumps( theta ) ) )
	with open(path + 'grid_prob.json', 'wb') as fp:
		fp.write( anyjson.dumps( prob ) )
		#fp.write( jsb.beautify( anyjson.dumps( prob ) ) )

	fig=plt.figure(figsize=(10, 10))
	fig.set_tight_layout(True)
	ax=fig.add_subplot(111)
	ax.set_xlim(lng1, lng2)
	ax.set_ylim(lat1, lat2)
	ax.grid()
	ax.set_title('Single User Sample Space')
	plt.savefig(path + 'artificial_grid' + '.png')
Exemple #18
0
def calcTweetDistances():
	print 'Calculating tweeting distances...'
	_, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	print [i[0] for i in hbk_user_home_loc]
	hbk_home_list = {}
	for user_home in hbk_user_home_loc:
		hbk_home_list[user_home[0]] = [user_home[1], user_home[2]]

	with open('data/' + my.DATA_FOLDER + my.HBK_TWEET_DIST_FILE, 'wb') as fp:
		csv_writer = csv.writer(fp, delimiter=',')
		for tweet in hbk_all_tweets:
			user_id = tweet[0]
			#print tweet, hbk_home_list[user_id]
			dist = int(round(geo.distance(geo.xyz(tweet[1], tweet[2]), geo.xyz(hbk_home_list[user_id][0], hbk_home_list[user_id][1]))))
			csv_writer.writerow([user_id, dist])
	print 'Done calculating tweeting distances...'


# Apply Normalizing vectors to visit matrix
def apply_non_home_norm(visit_mat, norm):
	print 'Applying Non-Home tweet count normalization to visit matrix...'
	for from_id in my.HBK_GANG_ID_LIST:
		for to_id in my.HBK_GANG_ID_LIST:
			if norm[to_id] != 0:
				visit_mat[from_id][to_id] /= norm[to_id]
			else:
				visit_mat[from_id][to_id] = 0
	return visit_mat
Exemple #19
0
def _find_daily_disp(user_id):
	'''Find daily max displacements for user_id and generate scatter plot'''
	# Displacement csv
	SQL = 'SELECT ST_X(geo), ST_Y(geo) \
		FROM {rel_home} \
		WHERE user_id = %s '.format(rel_home=my.REL_HOME)

	con = psycopg2.connect(my.DB_CONN_STRING)
	cur = con.cursor()
	cur.execute(SQL, (user_id,))
	records = cur.fetchall()

	if len(records) > 0:
		home = records[0]
		user_disp = {}
		x, y = [], []
		with open('data/' + my.DATA_FOLDER + 'city_bound_pol.txt', 'rb') as fpr:
			bound_pol = fpr.read().strip()
		SQL = 'SELECT ST_X(geo), ST_Y(geo), (timestamp AT TIME ZONE \'{timezone}\')::date \
			FROM {rel_tweet} \
			WHERE user_id = %s \
			AND geo && ST_GeomFromGeoJSON(%s) '.format(rel_tweet=my.REL_TWEET, timezone=my.TIMEZONE) \
			+ my.QUERY_CONSTRAINT \
			+ 'ORDER BY timestamp'
		cur.execute(SQL, (user_id, bound_pol))
		records = cur.fetchall()
		con.close()

		for rec in records:
			lat, lng, ds = rec
			x.append(lng-home[1])
			y.append(lat-home[0])
			if ds not in user_disp:
				user_disp[ds] = 0
			try:
				dist = int(round(geo.distance(geo.xyz(home[0], home[1]), geo.xyz(lat, lng))))
			except:
				dist = 0
			if dist > user_disp[ds]:
				user_disp[ds] = dist

		if not os.path.exists('data/' + my.DATA_FOLDER + 'displacement/' + 'user_disp/'):
			os.makedirs('data/' + my.DATA_FOLDER + 'displacement/' + 'user_disp/')
		with open('data/' + my.DATA_FOLDER + 'displacement/' + 'user_disp/' + str(user_id) + '.csv', 'wb') as fpw:
			cw = csv.writer(fpw, delimiter=',')
			for ds in user_disp:
				cw.writerow([user_id, user_disp[ds], ds])

		# Displacement plot
		fig = plt.figure(figsize=(5,5))
		ax = fig.add_subplot(111)
		plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)
		ax.set_autoscaley_on(False)
		ax.set_ylim([-0.5,0.5])
		ax.set_xlim([-0.5,0.5])
		ax.set_yticks([0.0])
		ax.set_xticks([0.0])
		ax.set_yticklabels([])
		ax.set_xticklabels([])
		ax.grid(True)
		ax.plot(x, y, 'b+')
		ax.plot([0], [0], 'r^')
		ax.text(-0.45, -0.45, str(user_id), fontsize=10)
		if not os.path.exists('data/' + my.DATA_FOLDER + 'displacement/' + 'plot_disp/'):
			os.makedirs('data/' + my.DATA_FOLDER + 'displacement/' + 'plot_disp/')
		plt.savefig('data/' + my.DATA_FOLDER + 'displacement/' + 'plot_disp/' + str(user_id) + '.png')

	else:
		con.close()
		print 'Missed 1 user_id!'
Exemple #20
0
def calc_featAndPlot(folder='visits', file_name='visit_mat'):
# Calculate features for each neighborhoods
# and plot rankings
	hood_ids = _load_nhoodIDs()
	hood_info = _load_hoodInfo()
	visit_mat = _load_visitMat(folder, file_name)
	visit_mat_frac = _calc_visitMatFrac(visit_mat)
	inn = dict([(to_id, len([1 for from_id in hood_ids \
					if from_id != to_id and visit_mat_frac[from_id][to_id] != 0])) \
						for to_id in hood_ids])
	outn = dict([(from_id, len([1 for to_id in hood_ids \
					if to_id != from_id and visit_mat_frac[from_id][to_id] != 0])) \
						for from_id in hood_ids])
	inn = [i for i in inn if inn[i] > my.MIN_LINKS_FRAC*max(inn.values())]
	outn = [i for i in outn if outn[i] > my.MIN_LINKS_FRAC*max(outn.values())]
	print hood_ids
	print inn
	print outn

	#for a in visit_mat_frac:
	#	print a, len([1 for b in visit_mat_frac if visit_mat_frac[a][b] != 0 and visit_mat_frac[b][a] !=0])
	
	# Calculate each feature
	OUTFLOW_INFLOW = dict([(h_id, _calc_inflowVsOutflow(h_id, visit_mat_frac)) for h_id in hood_ids])
	IN_DENSITY = dict([(h_id, _calc_inDensity(h_id, visit_mat_frac)) for h_id in hood_ids])
	OUT_DENSITY = dict([(h_id, _calc_outDensity(h_id, visit_mat_frac)) for h_id in hood_ids])
	POPULARITY = dict([(h_id, _calc_Popularity(h_id, visit_mat_frac)) for h_id in hood_ids])
	ENTROPY_OUT = dict([(h_id, _calc_EntropyOut(h_id, visit_mat_frac)) for h_id in hood_ids])
	ENTROPY_OUT_BYN = dict([(h_id, _calc_EntropyOut_byN(h_id, visit_mat_frac)) for h_id in hood_ids])
	ENTROPY_OUT_ALL = dict([(h_id, _calc_EntropyOutAll(h_id, visit_mat_frac)) for h_id in hood_ids])
	ENTROPY_IN = dict([(h_id, _calc_EntropyIn(h_id, visit_mat_frac)) for h_id in hood_ids])
	ENTROPY_IN_BYN = dict([(h_id, _calc_EntropyIn_byN(h_id, visit_mat_frac)) for h_id in hood_ids])
	ENTROPY_IN_ALL = dict([(h_id, _calc_EntropyInAll(h_id, visit_mat_frac)) for h_id in hood_ids])
	KL_DIVERGENCE = dict([(h_id, _calc_KLDivergence(h_id, visit_mat_frac)) for h_id in hood_ids])

	ENTROPY_OUT = _trim_ids(ENTROPY_OUT, outn)
	ENTROPY_IN = _trim_ids(ENTROPY_IN, inn)
	
	# Initialize features for plot
	features = {'OUTFLOW_INFLOW' : OUTFLOW_INFLOW,
		'IN_DENSITY' : IN_DENSITY,
		'OUT_DENSITY' : OUT_DENSITY,
		'POPULARITY' : POPULARITY,
		'ENTROPY_OUT' : ENTROPY_OUT,
		'ENTROPY_OUT_(/N)' : ENTROPY_OUT_BYN,
		'ENTROPY_OUT_ALL' : ENTROPY_OUT_ALL,
		'ENTROPY_IN' : ENTROPY_IN,
		'ENTROPY_IN_(/N)' : ENTROPY_IN_BYN,
		'ENTROPY_IN_ALL' : ENTROPY_IN_ALL,
		'KL_DIVERGENCE': KL_DIVERGENCE}

	#with open('data/' + my.DATA_FOLDER  + 'features_' + folder + '.pickle', 'wb') as fp1:
	#	pickle.dump(features, fp1)

	#feature_names = ['OUTFLOW_INFLOW', 'IN_DENSITY', 'OUT_DENSITY', 'POPULARITY', 'ENTROPY_OUT', 'ENTROPY_OUT_ALL', 'ENTROPY_IN', 'ENTROPY_IN_ALL']
	#feature_names = ['OUTFLOW_INFLOW', 'POPULARITY', 'ENTROPY_OUT', 'ENTROPY_OUT_(/N)', 'ENTROPY_IN', 'ENTROPY_IN_(/N)']
	colors = ["#4DAF4A","#3B3B3B","#984EA3","#E41A1C","#A65628","#FA71AF","#FF7F00","#377EB8"]

	# Plot all feature ranks
	'''width = 6
	ind = np.arange(len(hood_ids)) * 10
	count = 0
	fig = plt.figure(figsize=(len(features)*2.5, len(hood_ids)*0.75))
	plt.subplots_adjust(left=0.02, right=0.96, top=0.88, bottom=0.02)

	for name in feature_names:
		x = [hood_info[h_id]['name'] for h_id in sorted(features[name], key=features[name].get)]
		y = [features[name][h_id] for h_id in sorted(features[name], key=features[name].get)]
		count += 2
		color = colors.pop()

		ax = fig.add_subplot(1, len(features)*2, count)
		ax.set_yticks(ind+(width/2))
		plt.setp(ax, xticklabels=[])
		plt.setp(ax, yticklabels=_conv_SplitLabels(x))
		#ax.tick_params(axis='x', labelsize=10)
		ax.barh(ind, y, width, color=color, alpha=0.75, edgecolor=color)
		ax.set_title(name + '\n\n')

	fig.suptitle('Neighborhood ranks: ' + folder.upper() + ' (' + my.DATA_FOLDER[:-1].upper() + ')', fontsize=18)
	plt.savefig('data/' + my.DATA_FOLDER + folder + '/' + 'hood_ranks__' + my.DATA_FOLDER[:-1] + '.png')
	'''

	# Plot map: polygons init
	pols = []
	pol_seq = []
	for h_id in hood_info:
		pol = hood_info[h_id]['polygon'][:-1]
		pol = [[ll[1], ll[0]] for ll in pol]
		pols.append(pol)
		pol_seq.append(h_id)
	lngs = [ll[0] for ll in pol for pol in pols]
	lats = [ll[1] for ll in pol for pol in pols]
	#print max(lngs), min(lngs), max(lats), min(lats)
	## MIGHT NEED TO SWAP x_dist and y_dist
	y_dist = geo.distance(geo.xyz(max(lats), max(lngs)), geo.xyz(max(lats), min(lngs)))
	x_dist = geo.distance(geo.xyz(max(lats), max(lngs)), geo.xyz(min(lats), max(lngs)))
	#print x_dist, y_dist

	# Plot map: each feature
	fig = plt.figure(figsize=(2 * 6, 3 * y_dist * 6/x_dist))
	plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)
	count = 0
	for name in feature_names:
		count += 1
		#x = [hood_info[h_id]['name'] for h_id in sorted(features[name], key=features[name].get)]
		#y = [features[name][h_id] for h_id in sorted(features[name], key=features[name].get)]
		heat = np.array([features[name][h_id] for h_id in pol_seq])

		#fig = plt.figure(figsize=(6, y_dist * 6/x_dist))
		ax = fig.add_subplot(3, 2, count, aspect='equal') 
		#ax = fig.add_subplot(4, 2, count, aspect=y_dist/x_dist) 
		coll = PolyCollection(pols, array=heat, cmap=mpl.cm.OrRd, edgecolors='k', alpha=0.75)
		## mpl.cm.datad for list of colormaps
		ax.add_collection(coll)
		ax.autoscale_view()
		ax.get_xaxis().set_ticklabels([])
		ax.get_yaxis().set_ticklabels([])
		fig.colorbar(coll, ax=ax)
		#ax.set_title(my.DATA_FOLDER[:-1].upper() + '(' + folder.upper() + '): ' + name)
		ax.set_title(name)
	
	fig.suptitle('Neighborhood ranks: ' + folder.upper() + ' (' + my.DATA_FOLDER[:-1].upper() + ')', fontsize=18)
	if not os.path.exists('data/' + my.DATA_FOLDER + 'nhood_rank/'):
		os.makedirs('data/' + my.DATA_FOLDER + 'nhood_rank/')
	plt.savefig('data/' + my.DATA_FOLDER + 'nhood_rank/' + file_name + '.png')

	# Plot seperate plots WEST LA
	'''feature_names = ['ENTROPY_OUT', 'ENTROPY_IN']