def generate_gang_tweet_counts(): # Generate each gang's tweet count tty_polys, hbk_poly = load.loadLocPoly() hbk_all_tweets = load.loadAllTweets() hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly) hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc) # read each gang's tweet count hbk_tweets_by_gang = {} print 'Finding tweet count by each gang...' for gang_id in hbk_users_in_gang_t: #hbk_tweets_by_gang[gang_id] = len(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id])) #hbk_tweets_by_gang[gang_id] = len(prep.removePolygon(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]), tty_polys[gang_id])) this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]) hbk_tweets_by_gang[gang_id] = 0 for foreign_id in my.HBK_GANG_ID_LIST: if gang_id != foreign_id: hbk_tweets_by_gang[gang_id] += len(prep.keepPolygon(this_gang_tweets, tty_polys[foreign_id])) print 'Each gang\'s tweet count: %s' % hbk_tweets_by_gang if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'): os.makedirs('data/' + my.DATA_FOLDER + 'json/') with open('data/' + my.DATA_FOLDER + 'json/' + 'gang_tweet_counts.json', 'wb') as fp1: fp1.write(anyjson.dumps(hbk_tweets_by_gang))
def calcVisitationMat(hbk_all_tweets, tty_polys, hbk_users_in_gang_t, dist_norm=None, hbk_user_home_loc=None): # visit_mat[i][j] = #tw(i) in j print 'Calculating visitation matrix...' # Load visit matrix .pickle if exists if not dist_norm and os.path.exists('data/' + my.DATA_FOLDER + 'json/visit_mat.pickle'): with open('data/' + my.DATA_FOLDER + 'json/' + 'visit_mat.pickle', 'rb') as fp1: visit_mat = pickle.load(fp1) elif dist_norm and os.path.exists('data/' + my.DATA_FOLDER + 'json/visit_mat__dist_norm.pickle'): with open('data/' + my.DATA_FOLDER + 'json/' + 'visit_mat__dist_norm.pickle', 'rb') as fp1: visit_mat = pickle.load(fp1) # Calculate visit matrix is .pickle doesn't exist else: hbk_home_list = {} if dist_norm: print '...for distance norm.' for user_home in hbk_user_home_loc: hbk_home_list[user_home[0]] = [user_home[1], user_home[2]] visit_mat = {} for gang_id in my.HBK_GANG_ID_LIST: visit_mat[gang_id] = {} for gang_id in my.HBK_GANG_ID_LIST: if gang_id not in hbk_users_in_gang_t: for to_id in my.HBK_GANG_ID_LIST: visit_mat[gang_id][to_id] = 0 #visit_mat[to_id][gang_id] = 0 else: this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]) for to_id in my.HBK_GANG_ID_LIST: this_tty_tweets = prep.keepPolygon(this_gang_tweets, tty_polys[to_id]) if dist_norm == None: visit_mat[gang_id][to_id] = len(this_tty_tweets) else: visit_val = 0 for tweet in this_tty_tweets: dist = geo.distance(geo.xyz(tweet[1], tweet[2]), geo.xyz(hbk_home_list[tweet[0]][0], hbk_home_list[tweet[0]][1])) dist_i = int(round(dist/100 + 1)) visit_val += 1/dist_norm[dist_i] #print str(dist_i) + '\t=>\t' + str(1/dist_norm[dist_i]) visit_mat[gang_id][to_id] = round(visit_val, 5) print 'Done calculating visitation matrix...' # Store visit matrix .pickle if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'): os.makedirs('data/' + my.DATA_FOLDER + 'json/') if not dist_norm: with open('data/' + my.DATA_FOLDER + 'json/' + 'visit_mat.pickle', 'wb') as fp1: pickle.dump(visit_mat, fp1) else: with open('data/' + my.DATA_FOLDER + 'json/' + 'visit_mat__dist_norm.pickle', 'wb') as fp1: pickle.dump(visit_mat, fp1) return visit_mat
def generate_gang_locs_json(): # Generate each gang's locations json tty_polys, hbk_poly = load.loadLocPoly() hbk_all_tweets = load.loadAllTweets() hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly) hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc) # trim each gang's tweets hbk_tweets_by_gang = {} print 'Finding tweets by each gang...' for gang_id in my.HBK_GANG_ID_LIST: this_gang_tweets = prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id]) if gang_id in hbk_users_in_gang_t else [] hbk_tweets_by_gang[gang_id] = [[tweet[1], tweet[2]] for tweet in this_gang_tweets] print 'Each gang\'s tweet count: %s' % dict([(gang_id, len(hbk_tweets_by_gang[gang_id])) for gang_id in hbk_tweets_by_gang]) print 'Total tweets = %s' % (sum([len(hbk_tweets_by_gang[gang_id]) for gang_id in hbk_tweets_by_gang])) if not os.path.exists('data/' + my.DATA_FOLDER + 'json/'): os.makedirs('data/' + my.DATA_FOLDER + 'json/') with open('data/' + my.DATA_FOLDER + 'json/' + 'gang_tweet_locs.json', 'wb') as fp1: fp1.write(anyjson.dumps(hbk_tweets_by_gang))
def see_gang_tweet_counts(): # See each gang's tweet count tty_polys, hbk_poly = load.loadLocPoly() tty_names = load.loadLocNames() hbk_all_tweets = load.loadAllTweets() hbk_user_home_loc = load.loadAllHomeLoc(hbk_poly) hbk_users_in_gang_t = load.loadUsersInGangTty(tty_polys, hbk_user_home_loc) # read each gang's tweet count hbk_tweets_by_gang = {} print 'Finding tweet count by each gang...' for gang_id in hbk_users_in_gang_t: hbk_tweets_by_gang[gang_id] = len(prep.keepUserIds(hbk_all_tweets, hbk_users_in_gang_t[gang_id])) print 'Each gang\'s tweet count: %s' % hbk_tweets_by_gang print '%2s %15s %5s %5s %8s %6s' % ('ID', 'NAME', '#TWs', '#USERs', '#RIVALs', 'TW/USR') for gang_id in hbk_tweets_by_gang: if hbk_tweets_by_gang[gang_id] != 0: print '%2s %15s %5s %5s %8s %6s' % (gang_id, tty_names[gang_id], hbk_tweets_by_gang[gang_id], len(hbk_users_in_gang_t[gang_id]), len(my.HBK_GANG_AND_RIVAL_IDS[gang_id]), int(hbk_tweets_by_gang[gang_id]/float(len(hbk_users_in_gang_t[gang_id])))) print 'Total number of users: %s' % sum([len(hbk_users_in_gang_t[gang_id]) for gang_id in hbk_tweets_by_gang if hbk_tweets_by_gang[gang_id] != 0]) print 'Total tweets from all users: %s' % sum([hbk_tweets_by_gang[gang_id] for gang_id in hbk_tweets_by_gang])