def print_top_betweenness(component, size=10): bc = nx.betweenness_centrality(component, weight='weight', normalized=True) for node in sorted(bc, key=bc.get, reverse=True)[0:size]: query = {'spec': {'user.id': int(node) }, 'fields':{'_id':0,'user.screen_name': 1} } this_data = bf.query_mongo_get_list(query, limit=1) print this_data['user']['screen_name'],'&', "{0:.4f}".format(bc[node]), '\\\\' return bc
def get_range(query=bf.all_tweets): tweets = bf.query_mongo_get_list(query) times = [] for tweet in tweets: tweet["created_at"] -= datetime.timedelta(hours=6) # Adjusting for TimeZone times.append(tweet["created_at"]) times.sort() return times
def get_data_list(time_step=60, just_count=True, limit=False): """Returns dictionary: Keys: Time Steps Values: #Tweets in TimeStep""" tweets = bf.query_mongo_get_list(bf.all_tweets) flood_days = {} for tweet in tweets: tweet["created_at"] -= datetime.timedelta(hours=6) # Adjusting for TimeZone this_hour = f.roundTime(tweet["created_at"], roundTo=time_step) if flood_days.has_key(this_hour): flood_days[this_hour].append(tweet) else: flood_days[this_hour] = [tweet] if just_count: for i in flood_days.keys(): flood_days[i] = len(flood_days[i]) return flood_days else: return flood_days
plt.title('Out Degree vs. Reciprocity') plt.ylabel("Reciprocity per Node") plt.xlabel("Out Degree") plt.show() def print_top_betweenness(component, size=10): bc = nx.betweenness_centrality(component, weight='weight', normalized=True) for node in sorted(bc, key=bc.get, reverse=True)[0:size]: query = {'spec': {'user.id': int(node) }, 'fields':{'_id':0,'user.screen_name': 1} } this_data = bf.query_mongo_get_list(query, limit=1) print this_data['user']['screen_name'],'&', "{0:.4f}".format(bc[node]), '\\\\' return bc if __name__ == '__main__': """First, get only geo_tagged_tweets""" geo_tagged_user_mentions = bf.query_mongo_get_list(bf.only_geo_tagged) print "Geo_Tagged found:", len(geo_tagged_user_mentions), "Making Graph..." umg_geo = user_mentions_graph(geo_tagged_user_mentions) print 'Users',len(umg_geo.nodes()) # connected_components = nx.weakly_connected_component_subgraphs(umg_geo) # print "Number of Connected Components:", len(connected_components) # for subgraph in connected_components[0:5]: # print "Component has", len(subgraph.nodes()) # #f.draw_network_plt(connected_components[1]) print '\nreciprocity, weighted:', f.get_graph_reciprocity(umg_geo) print 'reciprocity, unweighted:', f.get_graph_reciprocity(umg_geo, weighted=False) #print f.reciprocity_by_degree(umg_geo, 'in') print "self loops:" f.print_top_self_loops(umg_geo, size=10)
plt.plot([0,0],[0,1000], 'k-') #plt.plot([-.01,1],[],'r-') plt.title('Number of Triangles vs. Clustering Coefficient') plt.ylabel("Triangles") plt.xlabel("Clustering Coefficient") plt.xlim([0,.06]) plt.ylim([0,60]) return plt ############################## RUNTIME ####################################### if __name__ == '__main__': # Get all retweets retweets = bf.query_mongo_get_list(bf.retweets) print "Number of Retweets: ", len(retweets) retweets_graph = retweeted_graph(retweets) print "Nodes: ", len(retweets_graph.nodes()), "Edges: ", len(retweets_graph.edges()) # Trim nodes to greater than 500 trimmed_retweets = f.trim_graph(retweets_graph,'weight', 500) print "Trimmed to 500:",len(trimmed_retweets.nodes()) #f.write_network_gml(trimmed_retweets,'retweeted_hashtags_gt_500') # Triangles Vs. Clustering Coefficient make_triangle_cc_plot(trimmed_retweets, show_labels=True, threshold=60).show() # Centralities: #f.print_betweenness_centrality(trimmed_retweets)