Example #1
0
def generate_gang_tweet_counts():
# Generate each gang's tweet count
	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)

	# read each gang's tweet count
	hbk_tweets_by_gang = {}
	print 'Finding tweet count by each gang...'
	for gang_id in hbk_users_in_gang_t:
		#hbk_tweets_by_gang[gang_id] = len(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]))
		#hbk_tweets_by_gang[gang_id] = len(prep.removePolygon(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]), tty_polys[gang_id]))

		this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id])
		hbk_tweets_by_gang[gang_id] = 0
		for foreign_id in my.HBK_GANG_ID_LIST:
			if gang_id != foreign_id:
				hbk_tweets_by_gang[gang_id] += len(prep.keepPolygon(this_gang_tweets, tty_polys[foreign_id]))
	print 'Each gang\'s tweet count: %s' % hbk_tweets_by_gang

	if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'):
		os.makedirs('data/' + my.DATA_FOLDER + 'json/')
	with open('data/' + my.DATA_FOLDER  + 'json/' + 'gang_tweet_counts.json', 'wb') as fp1:
		fp1.write(anyjson.dumps(hbk_tweets_by_gang))
Example #2
0
def calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t, dist_norm=None, hbk_user_home_loc=None):
# visit_mat[i][j] = #tw(i) in j
	print 'Calculating visitation matrix...'

	# Load visit matrix .pickle if exists
	if not dist_norm and os.path.exists('data/' + my.DATA_FOLDER + 'json/visit_mat.pickle'):
		with open('data/' + my.DATA_FOLDER  + 'json/' + 'visit_mat.pickle', 'rb') as fp1:
			visit_mat = pickle.load(fp1)
	elif dist_norm and os.path.exists('data/' + my.DATA_FOLDER + 'json/visit_mat__dist_norm.pickle'):
		with open('data/' + my.DATA_FOLDER  + 'json/' + 'visit_mat__dist_norm.pickle', 'rb') as fp1:
			visit_mat = pickle.load(fp1)
	# Calculate visit matrix is .pickle doesn't exist
	else:
		hbk_home_list = {}
		if dist_norm:
			print '...for distance norm.'
			for user_home in hbk_user_home_loc:
				hbk_home_list[user_home[0]] = [user_home[1], user_home[2]]

		visit_mat = {}
		for gang_id in my.HBK_GANG_ID_LIST:
			visit_mat[gang_id] = {}

		for gang_id in my.HBK_GANG_ID_LIST:
			if gang_id not in hbk_users_in_gang_t:
				for to_id in my.HBK_GANG_ID_LIST:
					visit_mat[gang_id][to_id] = 0
					#visit_mat[to_id][gang_id] = 0
			else:
				this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id])
				for to_id in my.HBK_GANG_ID_LIST:
					this_tty_tweets = prep.keepPolygon(this_gang_tweets, tty_polys[to_id])
					if dist_norm == None:
						visit_mat[gang_id][to_id] = len(this_tty_tweets)
					else:
						visit_val = 0
						for tweet in this_tty_tweets:
							dist = geo.distance(geo.xyz(tweet[1], tweet[2]), geo.xyz(hbk_home_list[tweet[0]][0], hbk_home_list[tweet[0]][1]))
							dist_i = int(round(dist/100 + 1))
							visit_val += 1/dist_norm[dist_i]
							#print str(dist_i) + '\t=>\t' + str(1/dist_norm[dist_i])
						visit_mat[gang_id][to_id] = round(visit_val, 5)
		print 'Done calculating visitation matrix...'

		# Store visit matrix .pickle
		if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'):
			os.makedirs('data/' + my.DATA_FOLDER + 'json/')
		if not dist_norm:
			with open('data/' + my.DATA_FOLDER  + 'json/' + 'visit_mat.pickle', 'wb') as fp1:
				pickle.dump(visit_mat, fp1)
		else:
			with open('data/' + my.DATA_FOLDER  + 'json/' + 'visit_mat__dist_norm.pickle', 'wb') as fp1:
				pickle.dump(visit_mat, fp1)

	return visit_mat
Example #3
0
def generate_gang_locs_json():
# Generate each gang's locations json

	tty_polys, hbk_poly = load.loadLocPoly()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)

	# trim each gang's tweets
	hbk_tweets_by_gang = {}
	print 'Finding tweets by each gang...'
	for gang_id in my.HBK_GANG_ID_LIST:
		this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]) if gang_id in hbk_users_in_gang_t else []
		hbk_tweets_by_gang[gang_id] = [[tweet[1], tweet[2]] for tweet in this_gang_tweets]
	print 'Each gang\'s tweet count: %s' % dict([(gang_id, len(hbk_tweets_by_gang[gang_id])) for gang_id in hbk_tweets_by_gang])
	print 'Total tweets = %s' % (sum([len(hbk_tweets_by_gang[gang_id]) for gang_id in hbk_tweets_by_gang]))

	if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'):
		os.makedirs('data/' + my.DATA_FOLDER + 'json/')
	with open('data/' + my.DATA_FOLDER  + 'json/' + 'gang_tweet_locs.json', 'wb') as fp1:
		fp1.write(anyjson.dumps(hbk_tweets_by_gang))
Example #4
0
def see_gang_tweet_counts():
# See each gang's tweet count
	tty_polys, hbk_poly = load.loadLocPoly()
	tty_names = load.loadLocNames()
	hbk_all_tweets = load.loadAllTweets()
	hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly)
	hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc)

	# read each gang's tweet count
	hbk_tweets_by_gang = {}
	print 'Finding tweet count by each gang...'
	for gang_id in hbk_users_in_gang_t:
		hbk_tweets_by_gang[gang_id] = len(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]))
	print 'Each gang\'s tweet count: %s' % hbk_tweets_by_gang
	print '%2s %15s %5s %5s %8s %6s' % ('ID', 'NAME', '#TWs', '#USERs', '#RIVALs', 'TW/USR')
	for gang_id in hbk_tweets_by_gang:
		if hbk_tweets_by_gang[gang_id] != 0:
			print '%2s %15s %5s %5s %8s %6s' % (gang_id, tty_names[gang_id], hbk_tweets_by_gang[gang_id], len(hbk_users_in_gang_t[gang_id]), len(my.HBK_GANG_AND_RIVAL_IDS[gang_id]), int(hbk_tweets_by_gang[gang_id]/float(len(hbk_users_in_gang_t[gang_id]))))

	print 'Total number of users: %s' % sum([len(hbk_users_in_gang_t[gang_id]) for gang_id in hbk_tweets_by_gang if hbk_tweets_by_gang[gang_id] != 0])
	print 'Total tweets from all users: %s' % sum([hbk_tweets_by_gang[gang_id] for gang_id in hbk_tweets_by_gang])