def build_graph(): pair_list = TwitterUser.get_top_100_pair() DG = nx.DiGraph() DG.add_edges_from([(foer, twitter_user) for twitter_user, foer in pair_list]) betweenness = nx.betweenness_centrality(DG) closeness = nx.closeness_centrality(DG) edge_betweenness = nx.edge_betweenness(DG) clustering_co = nx.clustering(nx.Graph(DG)) page_rank = nx.pagerank(DG) for twitter_id in DG.nodes(): t = TwitterUser.get_by_id(twitter_id) node = DG.node[twitter_id] node['user_id'] = t.user_id node['label'] = t.scrn_name node['follower_count'] = t.foer_cnt node['friend_count'] = t.friend_cnt node['status_count'] = t.status_cnt node['location'] = t.location node['verified'] = t.verified node['twitter_age'] = (date.today() - t.created_at).days node['daily_tweet'] = t.status_cnt*1.0/node['twitter_age'] node['indegree'] = len([(id, foer) for id, foer in pair_list if id == twitter_id]) node['outdegree'] = len([(id, foer) for id, foer in pair_list if foer == twitter_id]) node['cluster'] = clustering_co[twitter_id] node['betweenness'] = betweenness[twitter_id] node['closeness'] = closeness[twitter_id] node['page_rank'] = page_rank[twitter_id] for out_n, in_n in DG.edges(): DG[out_n][in_n]['edge_betweenness'] = edge_betweenness[(out_n,in_n)] return DG
def build_graph_encoded(): pair_list = TwitterUser.get_top_100_pair() print DG = nx.DiGraph() DG.add_edges_from(pair_list) for twitter_id in DG.nodes(): t = TwitterUser.get_by_id(twitter_id) node = DG.node[twitter_id] node['twitter_id'] = t.user_id node['label'] = t.scrn_name.encode('utf-8') node['screen_name'] = t.scrn_name.encode('utf-8') node['name'] = t.name.encode('utf-8') node['follower_count'] = t.foer_cnt node['friend_count'] = t.friend_cnt node['status_count'] = t.status_cnt node['description'] = t.desc.encode('utf-8') node['location'] = t.location.encode('utf-8') node['created_at'] = str(t.created_at) node['verified'] = t.verified node['twitter_age'] = (date.today() - t.created_at).days node['daily_tweet'] = t.status_cnt*1.0/node['twitter_age'] node['follower_count_top100'] = len([(id, foer) for id, foer in pair_list if id == twitter_id]) node['friend_count_top100'] = len([(id, foer) for id, foer in pair_list if foer == twitter_id]) return DG
def save_user_followers(user): try: c = Cursor(api.followers,user.user_id) except TweepError: print "tweep breaks!" print TweepError.message while(True): try: print 'taking a rest before move to next page' sleep(10) page = c.pages().next() print "start a new page of user ", user.scrn_name, \ 'page', c.pages().count except TweepError: print "tweep breaks!" print TweepError.message continue except StopIteration: print "Move to next unscanned" break for tweepy_user in page: print "follower -----", tweepy_user.screen_name, "----- found......" if TwitterUser.get_by_id(tweepy_user.id) or \ is_in_no_chn(tweepy_user.id): print 'ALREADY in DB!!, skip' continue try: if not tweepy_user.protected or \ (tweepy_user.protected and tweepy_user.following): if is_chn(tweepy_user): print "and speaks Chinese! Saving...." TwitterUser.save_tweepy_user(tweepy_user) else: save_non_chn(tweepy_user.id) print "pitty, s/he is not Chinese Speaker, next..." continue except TweepError: print "tweep breaks!" print TweepError.message try: print "the remaining hit is ", \ api.rate_limit_status()['remaining_hits'] except TweepError: print "tweep breaks!" print TweepError.message page =[] user.update_scanned()
def get_relation(top_list): top_list.reverse() for twitter_id in top_list: print twitter_id, 'analyzing.....' sleep(3) print 'getting followers id...' twitter_user = TwitterUser.get_by_id(twitter_id) try: tweepy_obj = twitter_user.tweepy_obj foer_ids = get_follower_ids(tweepy_obj) api.rate_limit_status()['remaining_hits'] top_100_foer = list(set(foer_ids).intersection(TOP_100)) print 'saving relation....' for id in top_100_foer: TwitterUser.save_relationship(twitter_id, id) except TweepError: print "tweep breaks!" print TweepError.message