def index(req, session, postgres_handle): root_user = None if 'user_id' in req.params: root_user = TwitterUser.get_by_id(req.params['user_id'], postgres_handle) if not root_user: root_user = TwitterUser.by_screen_name('SmartTypes', postgres_handle) reduction = TwitterReduction.get_latest_reduction(root_user.id, postgres_handle) if not reduction: root_user = TwitterUser.by_screen_name('SmartTypes', postgres_handle) reduction = TwitterReduction.get_latest_reduction( root_user.id, postgres_handle) return { 'active_tab': 'social_map', 'template_path': 'social_map/index.html', 'root_user': root_user, 'reduction': reduction, 'num_groups': len(TwitterGroup.all_groups(reduction.id, postgres_handle)), 'users_with_a_reduction': TwitterReduction.get_users_with_a_reduction(postgres_handle), }
def user(request): if 'user_id' in request.params: user_id = int(request.params['user_id']) twitter_user = TwitterUser.get_by_id(user_id) else: screen_name = request.params['screen_name'] twitter_user = TwitterUser.by_screen_name(screen_name) return { 'twitter_user':twitter_user, }
def get_igraph_g(self): from smarttypes.model.twitter_user import TwitterUser from smarttypes.graphreduce.reduce_graph import get_igraph_graph network = {} for score, user_id in self.get_members(): user = TwitterUser.get_by_id(user_id, self.postgres_handle) network[user.id] = set(user.following_ids) g = get_igraph_graph(network) pagerank = g.pagerank(damping=0.65) both = zip(pagerank, g.vs['name']) for x, y in sorted(both): print x print TwitterUser.get_by_id(y, self.postgres_handle).screen_name
def get_igraph_g(self): from smarttypes.model.twitter_user import TwitterUser from smarttypes.graphreduce.reduce_graph import get_igraph_graph network = {} for score, user_id in self.get_members(): user = TwitterUser.get_by_id(user_id, self.postgres_handle) network[user.id] = set(user.following_ids) g = get_igraph_graph(network) pagerank = g.pagerank(damping=0.65) both = zip(pagerank, g.vs["name"]) for x, y in sorted(both): print x print TwitterUser.get_by_id(y, self.postgres_handle).screen_name
def complete_signin(request_key, verifier, postgres_handle): auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) session = TwitterSession.get_by_request_key(request_key, postgres_handle) if session.access_key: #this happens if we get multiple same exact responses from twitter #perhaps crazy clicking or back / forward browsing credentials = TwitterCredentials.get_by_access_key( session.access_key, postgres_handle) else: auth.set_request_token(request_key, session.request_secret) auth.get_access_token(verifier) # may have signed up already credentials = TwitterCredentials.get_by_access_key( auth.access_token.key, postgres_handle) if not credentials: credentials = TwitterCredentials.create(auth.access_token.key, auth.access_token.secret, postgres_handle) session.access_key = credentials.access_key if not credentials.twitter_user: #probably don't have the user in our db yet user = TwitterUser.upsert_from_api_user(credentials.api_handle.me(), postgres_handle) credentials.twitter_id = user.id credentials.save() #email screen_name = credentials.twitter_user.screen_name email_utils.send_email('*****@*****.**', ['*****@*****.**'], '%s signed up' % screen_name, 'smarttypes signup!') return session.save()
def complete_signin(request_key, verifier, postgres_handle): auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) session = TwitterSession.get_by_request_key(request_key, postgres_handle) if session.access_key: #this happens if we get multiple same exact responses from twitter #perhaps crazy clicking or back / forward browsing credentials = TwitterCredentials.get_by_access_key(session.access_key, postgres_handle) else: auth.set_request_token(request_key, session.request_secret) auth.get_access_token(verifier) # may have signed up already credentials = TwitterCredentials.get_by_access_key(auth.access_token.key, postgres_handle) if not credentials: credentials = TwitterCredentials.create(auth.access_token.key, auth.access_token.secret, postgres_handle) session.access_key = credentials.access_key if not credentials.twitter_user: #probably don't have the user in our db yet user = TwitterUser.upsert_from_api_user(credentials.api_handle.me(), postgres_handle) credentials.twitter_id = user.id credentials.save() #email screen_name = credentials.twitter_user.screen_name email_utils.send_email('*****@*****.**', ['*****@*****.**'], '%s signed up' % screen_name, 'smarttypes signup!') return session.save()
def index(req, session, postgres_handle): #if it's not a valid request keep reduction_id None #don't do work for bots that don't know what they're #looking for reduction = None user_reduction_counts = TwitterReduction.get_user_reduction_counts( postgres_handle) random.shuffle(user_reduction_counts) if req.path.split('/') > 1 and req.path.split( '/')[1]: # path looks like '/something' root_user = TwitterUser.by_screen_name( req.path.split('/')[1], postgres_handle) if root_user: reduction = TwitterReduction.get_latest_reduction( root_user.id, postgres_handle) if not reduction and is_int(req.path.split('/')[1]): reduction = TwitterReduction.get_by_id( req.path.split('/')[1], postgres_handle) else: reduction = TwitterReduction.get_latest_reduction( user_reduction_counts[0][0].id, postgres_handle) return { 'reduction_id': reduction.id if reduction and reduction.tiles_are_written_to_disk else None, 'reduction': reduction if reduction and reduction.tiles_are_written_to_disk else None, 'user_reduction_counts': user_reduction_counts }
def print_user_details(user_ids, postgres_handle): for user in TwitterUser.get_by_ids(user_ids, postgres_handle): try: print "%s -- %s" % (user.screen_name, user.description[:100].replace('\n', ' ') if user.description else '') except Exception, e: ''
def logged_in_user(request): screen_name = request.params['screen_name'] logged_in_user = TwitterUser.by_screen_name(screen_name) return { 'logged_in_user':logged_in_user, 'TwitterGroup':TwitterGroup, }
def load_user_and_the_people_they_follow(creds, user_id, postgres_handle): remaining_hits_threshold = 10 api_handle = creds.api_handle root_user = creds.root_user is_root_user = False if root_user.id == user_id: is_root_user = True # if is_root_user and 'root_user.is_fake_user': # return None remaining_hits, reset_time = get_rate_limit_status(api_handle) if remaining_hits < remaining_hits_threshold: raise Exception("%s: remaining_hits less than threshold!" % root_user.screen_name) try: api_user = api_handle.get_user(user_id=user_id) except TweepError, ex: print "%s: api_handle.get_user(%s) got a TweepError %s" % (root_user.screen_name, user_id, ex) if 'Sorry, that page does not exist' in str(ex) or 'User has been suspended' in str(ex): print 'setting caused_an_error' model_user = TwitterUser.get_by_id(user_id, postgres_handle) if not model_user: properties = {'id': user_id, 'screen_name': user_id} model_user = TwitterUser(postgres_handle=postgres_handle, **properties) model_user.save() postgres_handle.connection.commit() model_user.caused_an_error = datetime.now() model_user.save() postgres_handle.connection.commit() return None
def index(req, session, postgres_handle): root_user = None if 'user_id' in req.params: root_user = TwitterUser.get_by_id(req.params['user_id'], postgres_handle) if not root_user: root_user = TwitterUser.by_screen_name('SmartTypes', postgres_handle) reduction = TwitterReduction.get_latest_reduction(root_user.id, postgres_handle) if not reduction: root_user = TwitterUser.by_screen_name('SmartTypes', postgres_handle) reduction = TwitterReduction.get_latest_reduction(root_user.id, postgres_handle) return { 'active_tab': 'social_map', 'template_path': 'social_map/index.html', 'root_user': root_user, 'reduction': reduction, 'num_groups': len(TwitterGroup.all_groups(reduction.id, postgres_handle)), 'users_with_a_reduction': TwitterReduction.get_users_with_a_reduction(postgres_handle), }
def get_user_reduction_counts(cls, postgres_handle): from smarttypes.model.twitter_user import TwitterUser return_users = [] qry = """ select root_user_id, count(root_user_id) as reduction_count from twitter_reduction group by root_user_id; """ for result in postgres_handle.execute_query(qry): user = TwitterUser.get_by_id(result['root_user_id'], postgres_handle) return_users.append((user, result['reduction_count'])) return return_users
def community_features(req, session, postgres_handle): reduction = None if req.path.split('/') > 3 and req.path.split('/')[3]: # path looks like '/social_map/community_features/something' root_user = TwitterUser.by_screen_name(req.path.split('/')[3], postgres_handle) if root_user: reduction = TwitterReduction.get_latest_reduction(root_user.id, postgres_handle) if not reduction and is_int(req.path.split('/')[3]): reduction = TwitterReduction.get_by_id(req.path.split('/')[3], postgres_handle) return { 'content_type': 'application/json', 'json':reduction.get_geojson_community_features() if reduction else [], }
def top_users(self, num_users=20, just_ids=False): from smarttypes.model.twitter_user import TwitterUser return_list = [] score_user_id_tup_list = self.get_members() for score, user_id in heapq.nlargest(num_users, score_user_id_tup_list): if score: add_this = (score, user_id) if not just_ids: add_this = (score, TwitterUser.get_by_id(user_id, self.postgres_handle)) return_list.append(add_this) else: break return return_list
def get_users_with_a_reduction(cls, postgres_handle): from smarttypes.model.twitter_user import TwitterUser return_users = [] qry = """ select distinct root_user_id from twitter_reduction order by root_user_id; """ for result in postgres_handle.execute_query(qry): user = TwitterUser.get_by_id(result['root_user_id'], postgres_handle) return_users.append(user) return return_users
def node_details(req, session, postgres_handle): twitter_user, in_links, out_links = None, [], [] if 'node_id' in req.params and 'reduction_id' in req.params: reduction = TwitterReduction.get_by_id(req.params['reduction_id'], postgres_handle) twitter_user = TwitterUser.get_by_id(req.params['node_id'], postgres_handle) if twitter_user: in_links, out_links = reduction.get_in_and_out_links_for_user(req.params['node_id']) return { 'template_path': 'social_map/node_details.html', 'twitter_user': twitter_user, 'in_links':in_links, 'out_links':out_links, }
def load_user_and_the_people_they_follow(api_handle, screen_name): print "Attempting to load %s" % screen_name continue_or_exit(api_handle) try: api_user = api_handle.get_user(screen_name=screen_name) except TweepError, ex: print "Got a TweepError: %s." % ex if str(ex) == "Not found": print "Setting caused_an_error for %s " % screen_name model_user = TwitterUser.by_screen_name(screen_name) model_user.caused_an_error = datetime.now() model_user.save() return model_user
def top_users(self, num_users=20, just_ids=False): from smarttypes.model.twitter_user import TwitterUser return_list = [] i = 0 for score, user_id in sorted(self.scores_users, reverse=True): if i <= num_users and score > .001: add_this = (score, user_id) if not just_ids: add_this = (score, TwitterUser.get_by_id(user_id)) return_list.append(add_this) else: break i += 1 return return_list
def pull_some_users(user_id): postgres_handle = PostgresHandle(smarttypes.connection_string) root_user = TwitterUser.get_by_id(user_id, postgres_handle) if not root_user: raise Exception('User ID: %s not in our DB!' % user_id) if not root_user.credentials: raise Exception('%s does not have api credentials!' % root_user.screen_name) api_handle = root_user.credentials.api_handle root_user = load_user_and_the_people_they_follow(api_handle, root_user.id, postgres_handle, is_root_user=True) load_this_user_id = root_user.get_id_of_someone_in_my_network_to_load() while load_this_user_id: load_user_and_the_people_they_follow(api_handle, load_this_user_id, postgres_handle) load_this_user_id = root_user.get_id_of_someone_in_my_network_to_load() #load_this_user_id = None print "Finshed loading all related users for %s!" % root_user.screen_name
def node_details(req, session, postgres_handle): twitter_user, in_links, out_links = None, [], [] if 'node_id' in req.params and 'reduction_id' in req.params: reduction = TwitterReduction.get_by_id(req.params['reduction_id'], postgres_handle) twitter_user = TwitterUser.get_by_id(req.params['node_id'], postgres_handle) if twitter_user: in_links, out_links = reduction.get_in_and_out_links_for_user( req.params['node_id']) return { 'template_path': 'social_map/node_details.html', 'twitter_user': twitter_user, 'in_links': in_links, 'out_links': out_links, }
def load_network_from_the_db(postgres_handle, distance): network = OrderedDict() def add_user_to_network(user): network[user.id] = {} network[user.id]['following_ids'] = set(user.following_ids) #network[user.id]['following_ids'].add(user.id) network[user.id]['follower_ids'] = set([]) network[user.id]['following_count'] = user.following_count network[user.id]['followers_count'] = user.followers_count twitter_user = TwitterUser.by_screen_name('SmartTypes', postgres_handle) add_user_to_network(twitter_user) for following in twitter_user.following: add_user_to_network(following) for following_following in following.following[:distance]: add_user_to_network(following_following) return network
def load_network_from_the_db(postgres_handle, distance): network = OrderedDict() def add_user_to_network(user): network[user.id] = {} network[user.id]['following_ids'] = set(user.following_ids) network[user.id]['follower_ids'] = set([]) network[user.id]['following_count'] = user.following_count network[user.id]['followers_count'] = user.followers_count twitter_user = TwitterUser.by_screen_name('SmartTypes', postgres_handle) add_user_to_network(twitter_user) for following in twitter_user.following: add_user_to_network(following) for following_following in following.following[:distance]: add_user_to_network(following_following) return network
def complete_signin(request_key, verifier, postgres_handle): auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) session = TwitterSession.get_by_request_key(request_key, postgres_handle) auth.set_request_token(request_key, session.request_secret) auth.get_access_token(verifier) # may have signed up already credentials = TwitterCredentials.get_by_access_key(auth.access_token.key, postgres_handle) if not credentials: credentials = TwitterCredentials.create(auth.access_token.key, auth.access_token.secret, postgres_handle) session.access_key = credentials.access_key if not credentials.twitter_user: user = TwitterUser.upsert_from_api_user(credentials.api_handle.me(), postgres_handle) credentials.twitter_id = user.id credentials.save() screen_name = credentials.twitter_user.screen_name email_utils.send_email('*****@*****.**', ['*****@*****.**'], '%s signed up' % screen_name, 'smarttypes signup!') return session.save()
def index(req, session, postgres_handle): #if it's not a valid request keep reduction_id None #don't do work for bots that don't know what they're #looking for reduction = None user_reduction_counts = TwitterReduction.get_user_reduction_counts(postgres_handle) random.shuffle(user_reduction_counts) if req.path.split('/') > 1 and req.path.split('/')[1]: # path looks like '/something' root_user = TwitterUser.by_screen_name(req.path.split('/')[1], postgres_handle) if root_user: reduction = TwitterReduction.get_latest_reduction(root_user.id, postgres_handle) if not reduction and is_int(req.path.split('/')[1]): reduction = TwitterReduction.get_by_id(req.path.split('/')[1], postgres_handle) else: reduction = TwitterReduction.get_latest_reduction(user_reduction_counts[0][0].id, postgres_handle) return { 'reduction_id': reduction.id if reduction and reduction.tiles_are_written_to_disk else None, 'reduction': reduction if reduction and reduction.tiles_are_written_to_disk else None, 'user_reduction_counts': user_reduction_counts }
def load_user_and_the_people_they_follow(creds, user_id, postgres_handle): remaining_hits_threshold = 10 api_handle = creds.api_handle root_user = creds.root_user is_root_user = False if root_user.id == user_id: is_root_user = True # if is_root_user and 'root_user.is_fake_user': # return None remaining_hits, reset_time = get_rate_limit_status(api_handle) if remaining_hits < remaining_hits_threshold: raise Exception("%s: remaining_hits less than threshold!" % root_user.screen_name) try: api_user = api_handle.get_user(user_id=user_id) except TweepError, ex: print "%s: api_handle.get_user(%s) got a TweepError %s" % ( root_user.screen_name, user_id, ex) if 'Sorry, that page does not exist' in str( ex) or 'User has been suspended' in str(ex): print 'setting caused_an_error' model_user = TwitterUser.get_by_id(user_id, postgres_handle) if not model_user: properties = {'id': user_id, 'screen_name': user_id} model_user = TwitterUser(postgres_handle=postgres_handle, **properties) model_user.save() postgres_handle.connection.commit() model_user.caused_an_error = datetime.now() model_user.save() postgres_handle.connection.commit() return None
if 'Sorry, that page does not exist' in str( ex) or 'User has been suspended' in str(ex): print 'setting caused_an_error' model_user = TwitterUser.get_by_id(user_id, postgres_handle) if not model_user: properties = {'id': user_id, 'screen_name': user_id} model_user = TwitterUser(postgres_handle=postgres_handle, **properties) model_user.save() postgres_handle.connection.commit() model_user.caused_an_error = datetime.now() model_user.save() postgres_handle.connection.commit() return None model_user = TwitterUser.upsert_from_api_user(api_user, postgres_handle) postgres_handle.connection.commit() screen_name = model_user.screen_name if api_user.protected: print "%s: %s is protected." % (root_user.screen_name, screen_name) return model_user following_ids = [] print "%s: loading the people %s follows." % (root_user.screen_name, screen_name) try: max_pages = 1 #5 if is_root_user else 1 following_id_pages = tweepy.Cursor(api_handle.friends_ids, user_id=user_id).pages(max_pages) for following_ids_page in following_id_pages:
import smarttypes, random from smarttypes.utils.postgres_handle import PostgresHandle postgres_handle = PostgresHandle(smarttypes.connection_string) from smarttypes.model.twitter_user import TwitterUser from smarttypes.model.twitter_credentials import TwitterCredentials from smarttypes.scripts import get_twitter_friends ####################### #global variables smarttypes = TwitterUser.by_screen_name('SmartTypes', postgres_handle) smarttypes_api_handle = smarttypes.credentials.api_handle cocacola = TwitterUser.by_screen_name('CocaCola', postgres_handle) ######################## #tests #load smarttypes get_twitter_friends.load_user_and_the_people_they_follow(smarttypes_api_handle, smarttypes.id, postgres_handle, is_root_user=True, remaining_hits_threshold=9)
rateMat[:, 1] += nUser + 1 # item index starts from 0, graphlab pmf expects nUser+1 filehandle = open(outfile_path, 'wb') size.tofile(filehandle) rateMat.tofile(filehandle) filehandle.close() if __name__ == "__main__": args_dict = eval( sys.argv[1] if len(sys.argv) > 1 else "{'screen_name':'SmartTypes'}") screen_name = args_dict['screen_name'] twitter_user = TwitterUser.by_screen_name(screen_name, postgres_handle) following_dict = {} following_dict[twitter_user.twitter_id] = twitter_user.following_ids print "Collect all followers" for following_user in twitter_user.following: if following_user.twitter_id not in following_dict: following_dict[ following_user.twitter_id] = following_user.following_ids for following_following_user in following_user.following: if following_following_user.following_ids and following_following_user.twitter_id not in following_dict: following_dict[ following_following_user. twitter_id] = following_following_user.following_ids record_count = len(following_dict.keys()) if record_count % 2000 == 0:
def root_user(self): from smarttypes.model.twitter_user import TwitterUser if not self.root_user_id: return None return TwitterUser.get_by_id(self.root_user_id, self.postgres_handle)
def print_user_details(user_ids, postgres_handle): for user in TwitterUser.get_by_ids(user_ids, postgres_handle): print "%s -- %s" % (user.screen_name, user.description[:100].replace('\n', ' ') if user.description else '')
import os import pickle from datetime import datetime import numpy as np import smarttypes, random from smarttypes.utils.postgres_handle import PostgresHandle postgres_handle = PostgresHandle(smarttypes.connection_string) from smarttypes.graphreduce import GraphReduce from smarttypes.model.twitter_user import TwitterUser from smarttypes.model.twitter_group import TwitterGroup from smarttypes.model.twitter_reduction import TwitterReduction from smarttypes.model.twitter_credentials import TwitterCredentials screen_name = "SmartTypes" root_user = TwitterUser.by_screen_name(screen_name, postgres_handle) follower_followies_map = root_user.get_graph_info(distance=0, min_followers=60) gr = GraphReduce(screen_name, follower_followies_map) gr.reduce_with_linloglayout() gr.figure_out_reduction_distances() # model_user = TwitterUser.by_screen_name('SmartTypes', postgres_handle) # api_handle = model_user.credentials.api_handle # api_user = api_handle.get_user(screen_name='SmartTypes') # print os.path.dirname(os.path.abspath(__file__))
print '%s %s %s' % ( string.ljust(creds_username, 20), string.ljust(root_username, 20), string.ljust(creds.email if creds.email else '', 30)) if __name__ == "__main__": """ if no args, show all creds if args, first arg is creds_username, second is root_username """ if len(sys.argv) == 1: list_cred_details() elif len(sys.argv) == 2: creds_user = TwitterUser.by_screen_name(sys.argv[1], postgres_handle) creds = TwitterCredentials.get_by_twitter_id(creds_user.id, postgres_handle) creds.root_user_id = None creds.save() postgres_handle.connection.commit() else: creds_user = TwitterUser.by_screen_name(sys.argv[1], postgres_handle) root_user = TwitterUser.by_screen_name(sys.argv[2], postgres_handle) if not root_user: api_user = creds_user.credentials.api_handle.get_user( screen_name=sys.argv[2]) root_user = TwitterUser.upsert_from_api_user( api_user, postgres_handle) postgres_handle.connection.commit()
import smarttypes, sys from smarttypes.model.twitter_user import TwitterUser from smarttypes.model.twitter_tweet import TwitterTweet from datetime import datetime, timedelta from smarttypes.utils.postgres_handle import PostgresHandle postgres_handle = PostgresHandle(smarttypes.connection_string) if __name__ == "__main__": if not len(sys.argv) > 1: raise Exception('Need a twitter handle.') else: screen_name = sys.argv[1] #friends friends_file = open('/tmp/%s_twitter_friends.csv' % screen_name, 'w') TwitterUser.mk_following_following_csv(screen_name, friends_file, postgres_handle) #tweets_file = open('/tmp/%s_twitter_tweets.csv') #TwitterUser.mk_following_tweets_csv(screen_name, tweets_file)
def reduce_and_save_communities(root_user, distance=10, return_graph_for_inspection=False): print 'starting reduce_and_save_communities' print 'root_user: %s, following_in_our_db: %s, distance: %s' % ( root_user.screen_name, len(root_user.following), distance) network = TwitterUser.get_rooted_network(root_user, postgres_handle, distance=distance) print 'load %s users into igraph' % len(network) g = Graph(directed=True) keys_set = set(network.keys()) g.add_vertices(network.keys()) g.vs["id"] = network.keys() #need this for pajek format print 'iterative load into igraph' edges = [] for source in network: for target in network[source].intersection(keys_set): edges.append((source, target)) g.add_edges(edges) g = g.simplify() print 'make sure graph is connected' connected_clusters = g.clusters() connected_cluster_lengths = [len(x) for x in connected_clusters] connected_cluster_max_idx = connected_cluster_lengths.index( max(connected_cluster_lengths)) g = connected_clusters.subgraph(connected_cluster_max_idx) if g.is_connected(): print 'graph is connected' else: print 'graph is not connected' if return_graph_for_inspection: return g print 'write to pajek format' root_file_name = root_user.screen_name f = open('io/%s.net' % root_file_name, 'w') g.write(f, format='pajek') print 'run infomap' #infomap_command = 'infomap_dir/infomap 345234 io/%s.net 10' #infomap_command = 'conf-infomap_dir/conf-infomap 344 io/%s.net 10 10 0.50' infomap_command = 'infohiermap_dir/infohiermap 345234 io/%s.net 30' os.system(infomap_command % root_file_name) print 'read into memory' f = open('io/%s.smap' % root_file_name) section_header = '' communities = defaultdict(lambda: ([], [], [])) for line in f: if line.startswith('*Modules'): section_header = 'Modules' continue if line.startswith('*Insignificants'): section_header = 'Insignificants' continue if line.startswith('*Nodes'): section_header = 'Nodes' continue if line.startswith('*Links'): section_header = 'Links' continue if section_header == 'Modules': #looks like this: #1 "26000689,..." 0.130147 0.0308866 #The names under *Modules are derived from the node with the highest #flow volume within the module, and 0.25 0.0395432 represent, respectively, #the aggregated flow volume of all nodes within the module and the per #step exit flow from the module. continue if section_header == 'Nodes': #looks like this: #1:10 "2335431" 0.00365772 #or w/ a semicolon instead, semicolon means not significant #see http://www.tp.umu.se/~rosvall/code.html if ';' in line: continue community_idx = line.split(':')[0] node_id = line.split('"')[1] final_volume = float(line.split(' ')[2]) communities[community_idx][1].append(node_id) communities[community_idx][2].append(final_volume) if section_header == 'Links': #community_edges #looks like this: #1 4 0.0395432 community_idx = line.split(' ')[0] target_community_idx = line.split(' ')[1] edge_weight = line.split(' ')[2] communities[community_idx][0].append( '%s:%s' % (target_community_idx, edge_weight))
def load_user_and_the_people_they_follow(api_handle, screen_name): print "Attempting to load %s" % screen_name continue_or_exit(api_handle) try: api_user = api_handle.get_user(screen_name=screen_name) except TweepError, ex: print "Got a TweepError: %s." % ex if str(ex) == "Not found": print "Setting caused_an_error for %s " % screen_name model_user = TwitterUser.by_screen_name(screen_name) model_user.caused_an_error = datetime.now() model_user.save() return model_user model_user = TwitterUser.upsert_from_api_user(api_user) if api_user.protected: print "\t %s is protected" % screen_name return if api_user.friends_count > MAX_FOLLOWING_COUNT: print "\t %s follows too many people, %s" % (screen_name, api_user.friends_count) model_user.save_following_ids([]) return model_user print "Loading the people %s follows" % screen_name following_ids = [] try: api_following_list = list(tweepy.Cursor(api_handle.friends, screen_name).items()) except TweepError, ex:
def reduce_graph(screen_name, distance=20, min_followers=60, pickle_it=True, just_load_from_file=False): postgres_handle = PostgresHandle(smarttypes.connection_string) # if just_load_from_file: # print "Loading data from a pickle." # gr = GraphReduce(screen_name, {}) # f = open(gr.pickle_file_path) # twitter_reduction, groups = pickle.load(f) # twitter_reduction.id = None # twitter_reduction.postgres_handle = postgres_handle # twitter_reduction.save() # postgres_handle.connection.commit() # for group in groups: # group.id = None # group.reduction_id = twitter_reduction.id # group.postgres_handle = postgres_handle # group.save() # postgres_handle.connection.commit() # TwitterGroup.mk_tag_clouds(twitter_reduction.id, postgres_handle) # postgres_handle.connection.commit() # print "All done!" # return 0 ######################## ##reduce ######################## root_user = TwitterUser.by_screen_name(screen_name, postgres_handle) follower_followies_map = root_user.get_graph_info( distance=distance, min_followers=min_followers) gr = GraphReduce(screen_name, follower_followies_map) #gr.reduce_with_exafmm() gr.reduce_with_linloglayout() ######################## ##save reduction in db ######################## root_user_id = root_user.id user_ids = [] x_coordinates = [] y_coordinates = [] in_links = [] out_links = [] for i in range(len(gr.layout_ids)): user_id = gr.layout_ids[i] user_ids.append(user_id) x_coordinates.append(gr.reduction[i][0]) y_coordinates.append(gr.reduction[i][1]) itr_in_links = PostgresHandle.spliter.join(gr.G.predecessors(user_id)) itr_out_links = PostgresHandle.spliter.join(gr.G.successors(user_id)) in_links.append(itr_in_links) out_links.append(itr_out_links) twitter_reduction = TwitterReduction.create_reduction( root_user_id, user_ids, x_coordinates, y_coordinates, in_links, out_links, postgres_handle) postgres_handle.connection.commit() ######################## ##save groups in db ######################## groups = [] for i in range(gr.n_clusters): user_ids = [] for j in range(len(gr.layout_ids)): if gr.layout_clusters[j][i] > 0: user_ids.append(gr.layout_ids[j]) #run pagerank to get the scores group_graph = networkx.DiGraph() group_edges = [] for user_id in user_ids: if user_id in follower_followies_map: for following_id in set(user_ids).intersection( follower_followies_map[user_id]): group_edges.append((user_id, following_id)) print len(user_ids), len(group_edges) if not group_edges: continue group_graph.add_edges_from(group_edges) pagerank = networkx.pagerank(group_graph, max_iter=500) scores = [] for user_id in user_ids: scores.append(pagerank.get(user_id, 0)) groups.append( TwitterGroup.create_group(twitter_reduction.id, i, user_ids, scores, postgres_handle)) postgres_handle.connection.commit() twitter_reduction.save_group_info(postgres_handle) postgres_handle.connection.commit() ######################## ##mk_tag_clouds ######################## TwitterGroup.mk_tag_clouds(twitter_reduction.id, postgres_handle) postgres_handle.connection.commit()
def root_user(self): from smarttypes.model.twitter_user import TwitterUser return TwitterUser.get_by_id(self.root_user_id, self.postgres_handle)
import os import pickle from datetime import datetime import numpy as np import smarttypes, random from smarttypes.utils.postgres_handle import PostgresHandle postgres_handle = PostgresHandle(smarttypes.connection_string) from smarttypes.model.twitter_credentials import TwitterCredentials from smarttypes.model.twitter_user import TwitterUser from smarttypes.model.twitter_reduction import TwitterReduction from smarttypes.model.twitter_reduction_user import TwitterReductionUser from smarttypes.model.twitter_community import TwitterCommunity model_user = TwitterUser.by_screen_name('SmartTypes', postgres_handle) api_handle = model_user.credentials.api_handle api_user = api_handle.get_user(screen_name='SmartTypes')
group_adjacency.append(membership_scores) index_to_twitter_id_dict = pickle.load(open("index_to_twitter_id.pickle", "r")) user_group_map = {} TwitterGroup.bulk_delete("all") for i in range(num_features): membership_scores = [] for j in range(num_items): user_id = index_to_twitter_id_dict[j] follower_score = users_data[i][j] following_score = items_data[i][j] membership_score = following_score * following_score if membership_score > 0.001: membership_scores.append((membership_score, user_id)) if user_id not in user_group_map: user_group_map[user_id] = [(membership_score, i)] else: user_group_map[user_id].append((membership_score, i)) TwitterGroup.upsert_group(i, membership_scores, group_adjacency[i]) print "Done creating groups." TwitterUser.bulk_update("all", {"scores_groups": None}) i = 0 for user_id, scores_groups in user_group_map.items(): twitter_user = TwitterUser.get_by_id(user_id) twitter_user.scores_groups = scores_groups twitter_user.save() if i % 1000 == 0: print "Done with %s users." % i i += 1
def load_user_and_the_people_they_follow(api_handle, user_id, postgres_handle, is_root_user=False): print "Attempting to load user %s." % user_id continue_or_exit(api_handle, user_id) try: api_user = api_handle.get_user(user_id=user_id) except TweepError, ex: print "Got a TweepError: %s." % ex return None model_user = TwitterUser.upsert_from_api_user(api_user, postgres_handle) postgres_handle.connection.commit() screen_name = model_user.screen_name if api_user.protected: print "\t %s is protected." % screen_name return model_user if not is_root_user and api_user.friends_count > MAX_FOLLOWING_COUNT: print "\t %s follows too many people, %s." % (screen_name, api_user.friends_count) model_user.save_following_ids([]) postgres_handle.connection.commit() return model_user print "Loading the people %s follows." % screen_name
membership_scores.append((A[i][j] * A[j][i], j)) group_adjacency.append(membership_scores) index_to_twitter_id_dict = pickle.load(open('index_to_twitter_id.pickle', 'r')) user_group_map = {} TwitterGroup.bulk_delete('all') for i in range(num_features): membership_scores = [] for j in range(num_items): user_id = index_to_twitter_id_dict[j] follower_score = users_data[i][j] following_score = items_data[i][j] membership_score = following_score * following_score if membership_score > .001: membership_scores.append((membership_score, user_id)) if user_id not in user_group_map: user_group_map[user_id] = [(membership_score, i)] else: user_group_map[user_id].append((membership_score, i)) TwitterGroup.upsert_group(i, membership_scores, group_adjacency[i]) print "Done creating groups." TwitterUser.bulk_update('all', {'scores_groups': None}) i = 0 for user_id, scores_groups in user_group_map.items(): twitter_user = TwitterUser.get_by_id(user_id) twitter_user.scores_groups = scores_groups twitter_user.save() if i % 1000 == 0: print "Done with %s users." % i i += 1
TwitterGroup.bulk_delete('all') for i in range(num_features): membership_scores = [] for j in range(num_items): user_id = index_to_twitter_id_dict[j] follower_score = users_data[i][j] following_score = items_data[i][j] membership_score = following_score * following_score if membership_score > .001: membership_scores.append((membership_score, user_id)) if user_id not in user_group_map: user_group_map[user_id] = [(membership_score, i)] else: user_group_map[user_id].append((membership_score, i)) TwitterGroup.upsert_group(i, membership_scores, group_adjacency[i]) print "Done creating groups." TwitterUser.bulk_update('all', {'scores_groups':None}) i = 0 for user_id, scores_groups in user_group_map.items(): twitter_user = TwitterUser.get_by_id(user_id) twitter_user.scores_groups = scores_groups twitter_user.save() if i % 1000 == 0: print "Done with %s users." % i i += 1
def reduce_graph(screen_name, distance=20, min_followers=60): postgres_handle = PostgresHandle(smarttypes.connection_string) ########################################### ##reduce ########################################### root_user = TwitterUser.by_screen_name(screen_name, postgres_handle) follower_followies_map = root_user.get_graph_info( distance=distance, min_followers=min_followers) gr = GraphReduce(screen_name, follower_followies_map) gr.reduce_with_linloglayout() ########################################### ##save reduction in db ########################################### root_user_id = root_user.id user_ids = [] x_coordinates = [] y_coordinates = [] in_links = [] out_links = [] for i in range(len(gr.layout_ids)): user_id = gr.layout_ids[i] user_ids.append(user_id) x_coordinates.append(gr.reduction[i][0]) y_coordinates.append(gr.reduction[i][1]) itr_in_links = PostgresHandle.spliter.join(gr.G.predecessors(user_id)) itr_out_links = PostgresHandle.spliter.join(gr.G.successors(user_id)) in_links.append(itr_in_links) out_links.append(itr_out_links) twitter_reduction = TwitterReduction.create_reduction( root_user_id, user_ids, x_coordinates, y_coordinates, in_links, out_links, postgres_handle) postgres_handle.connection.commit() ########################################### ##save groups in db ########################################### groups = [] for i in range(gr.n_groups): user_ids = [] for j in range(len(gr.layout_ids)): if i == gr.groups[j]: user_ids.append(gr.layout_ids[j]) #run pagerank to get the scores group_graph = networkx.DiGraph() group_edges = [] for user_id in user_ids: for following_id in set(user_ids).intersection( follower_followies_map[user_id]): group_edges.append((user_id, following_id)) print len(user_ids), len(group_edges) if not group_edges: continue group_graph.add_edges_from(group_edges) pagerank = networkx.pagerank(group_graph, max_iter=500) scores = [] for user_id in user_ids: scores.append(pagerank.get(user_id, 0)) groups.append( TwitterGroup.create_group(twitter_reduction.id, i, user_ids, scores, postgres_handle)) postgres_handle.connection.commit() ########################################### ##makes for quicker queries in some cases ########################################### twitter_reduction.save_group_info(postgres_handle) postgres_handle.connection.commit() ########################################### ##mk_tag_clouds ########################################### TwitterGroup.mk_tag_clouds(twitter_reduction.id, postgres_handle) postgres_handle.connection.commit()
from smarttypes.graphreduce import reduce_graph if __name__ == "__main__": start_time = datetime.now() postgres_handle = PostgresHandle(smarttypes.connection_string) if not len(sys.argv) > 1: raise Exception('Need a twitter handle.') else: screen_name = sys.argv[1] if smarttypes.config.IS_PROD: start_here = datetime.now() else: start_here = datetime(2012, 8, 1) root_user = TwitterUser.by_screen_name(screen_name, postgres_handle) distance = 45000 / len(root_user.following[:5000]) #distance = 0 network = TwitterUser.get_rooted_network(root_user, postgres_handle, start_here=start_here, distance=distance, go_back_this_many_weeks=15) print "writing %s nodes to disk" % len(network) g = reduce_graph.get_igraph_graph(network) lang_names = [] loc_names = [] for node_id in g.vs['name']: user = TwitterUser.get_by_id(node_id, postgres_handle) lang_names.append(user.lang.encode('ascii', 'ignore')) loc_names.append(user.location_name.encode('ascii', 'ignore')) g.vs['lang_name'] = lang_names g.vs['loc_name'] = loc_names
def reduce_graph(screen_name, distance=20, min_followers=60): postgres_handle = PostgresHandle(smarttypes.connection_string) ########################################### ##reduce ########################################### root_user = TwitterUser.by_screen_name(screen_name, postgres_handle) follower_followies_map = root_user.get_graph_info(distance=distance, min_followers=min_followers) gr = GraphReduce(screen_name, follower_followies_map) gr.reduce_with_linloglayout() ########################################### ##save reduction in db ########################################### root_user_id = root_user.id user_ids = [] x_coordinates = [] y_coordinates = [] in_links = [] out_links = [] for i in range(len(gr.layout_ids)): user_id = gr.layout_ids[i] user_ids.append(user_id) x_coordinates.append(gr.reduction[i][0]) y_coordinates.append(gr.reduction[i][1]) itr_in_links = PostgresHandle.spliter.join(gr.G.predecessors(user_id)) itr_out_links = PostgresHandle.spliter.join(gr.G.successors(user_id)) in_links.append(itr_in_links) out_links.append(itr_out_links) twitter_reduction = TwitterReduction.create_reduction(root_user_id, user_ids, x_coordinates, y_coordinates, in_links, out_links, postgres_handle) postgres_handle.connection.commit() ########################################### ##save groups in db ########################################### groups = [] for i in range(gr.n_groups): user_ids = [] for j in range(len(gr.layout_ids)): if i == gr.groups[j]: user_ids.append(gr.layout_ids[j]) #run pagerank to get the scores group_graph = networkx.DiGraph() group_edges = [] for user_id in user_ids: for following_id in set(user_ids).intersection(follower_followies_map[user_id]): group_edges.append((user_id, following_id)) print len(user_ids), len(group_edges) if not group_edges: continue group_graph.add_edges_from(group_edges) pagerank = networkx.pagerank(group_graph, max_iter=500) scores = [] for user_id in user_ids: scores.append(pagerank.get(user_id, 0)) groups.append(TwitterGroup.create_group(twitter_reduction.id, i, user_ids, scores, postgres_handle)) postgres_handle.connection.commit() ########################################### ##makes for quicker queries in some cases ########################################### twitter_reduction.save_group_info(postgres_handle) postgres_handle.connection.commit() ########################################### ##mk_tag_clouds ########################################### TwitterGroup.mk_tag_clouds(twitter_reduction.id, postgres_handle) postgres_handle.connection.commit()
string.ljust(creds_username, 20), string.ljust(root_username, 20), string.ljust(creds.email if creds.email else '', 30) ) if __name__ == "__main__": """ if no args, show all creds if args, first arg is creds_username, second is root_username """ if len(sys.argv) == 1: list_cred_details() elif len(sys.argv) == 2: creds_user = TwitterUser.by_screen_name(sys.argv[1], postgres_handle) creds = TwitterCredentials.get_by_twitter_id(creds_user.id, postgres_handle) creds.root_user_id = None creds.save() postgres_handle.connection.commit() else: creds_user = TwitterUser.by_screen_name(sys.argv[1], postgres_handle) root_user = TwitterUser.by_screen_name(sys.argv[2], postgres_handle) if not root_user: api_user = creds_user.credentials.api_handle.get_user(screen_name=sys.argv[2]) root_user = TwitterUser.upsert_from_api_user(api_user, postgres_handle) postgres_handle.connection.commit() creds = TwitterCredentials.get_by_twitter_id(creds_user.id, postgres_handle) creds.root_user_id = root_user.id creds.save()
vertex_order_by=('size', True), edge_color="white", edge_width=0, edge_arrow_size=0.1, edge_arrow_width=0.1) if __name__ == "__main__": #call like this: #python reduce_graph.py SmartTypes 0 start_time = datetime.now() postgres_handle = PostgresHandle(smarttypes.connection_string) if len(sys.argv) < 3: raise Exception('Need a twitter handle and distance.') else: screen_name = sys.argv[1] distance = int(sys.argv[2]) root_user = TwitterUser.by_screen_name(screen_name, postgres_handle) smarttypes.config.IS_PROD = False if distance < 1: smarttypes.config.IS_PROD = True distance = 10000 / len(root_user.following[:1000]) network = TwitterUser.get_rooted_network(root_user, postgres_handle, distance=distance) g = get_igraph_graph(network) layout_list = reduce_with_linloglayout(g, root_user) #id_communities g, community_idx_list, vertex_clustering = id_communities(g, layout_list, eps=0.62, min_samples=12) #set color based on communities color_array = np.array(community_idx_list)
except TweepError, ex: print "%s: api_handle.get_user(%s) got a TweepError %s" % (root_user.screen_name, user_id, ex) if 'Sorry, that page does not exist' in str(ex) or 'User has been suspended' in str(ex): print 'setting caused_an_error' model_user = TwitterUser.get_by_id(user_id, postgres_handle) if not model_user: properties = {'id': user_id, 'screen_name': user_id} model_user = TwitterUser(postgres_handle=postgres_handle, **properties) model_user.save() postgres_handle.connection.commit() model_user.caused_an_error = datetime.now() model_user.save() postgres_handle.connection.commit() return None model_user = TwitterUser.upsert_from_api_user(api_user, postgres_handle) postgres_handle.connection.commit() screen_name = model_user.screen_name if api_user.protected: print "%s: %s is protected." % (root_user.screen_name, screen_name) return model_user following_ids = [] print "%s: loading the people %s follows." % (root_user.screen_name, screen_name) try: max_pages = 1#5 if is_root_user else 1 following_id_pages = tweepy.Cursor(api_handle.friends_ids, user_id=user_id).pages(max_pages) for following_ids_page in following_id_pages: following_ids += [str(x) for x in following_ids_page] except TweepError, ex:
def twitter_user(self): from smarttypes.model.twitter_user import TwitterUser if not self.twitter_id: return None return TwitterUser.get_by_id(self.twitter_id, self.postgres_handle)
import smarttypes, sys from smarttypes.model.twitter_user import TwitterUser from smarttypes.model.twitter_tweet import TwitterTweet from datetime import datetime, timedelta from smarttypes.utils.postgres_handle import PostgresHandle postgres_handle = PostgresHandle(smarttypes.connection_string) if __name__ == "__main__": if not len(sys.argv) > 1: raise Exception("Need a twitter handle.") else: screen_name = sys.argv[1] # friends friends_file = open("/tmp/%s_twitter_friends.csv" % screen_name, "w") TwitterUser.mk_following_following_csv(screen_name, friends_file, postgres_handle) # tweets_file = open('/tmp/%s_twitter_tweets.csv') # TwitterUser.mk_following_tweets_csv(screen_name, tweets_file)
print "done" return False #dont forget this return True if __name__ == "__main__": if not len(sys.argv) > 1: args_dict = {'screen_name':'SmartTypes'} else: args_dict = eval(sys.argv[1]) screen_name = args_dict['screen_name'] twitter_user = TwitterUser.by_screen_name(screen_name) auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) monitor_these_user_ids = twitter_user.following_following_ids[:4000] print "Num of users to monitor: %s" % len(monitor_these_user_ids) listener = Listener(monitor_these_user_ids) stream = Stream(auth,listener) stream.filter(follow=monitor_these_user_ids)
if remaining_hits < REMAINING_HITS_THRESHOLD: raise Exception("remaining_hits less than threshold %s" % put_this_in_the_error_message) def load_user_and_the_people_they_follow(api_handle, user_id, postgres_handle, is_root_user=False): print "Attempting to load user %s." % user_id continue_or_exit(api_handle, user_id) try: api_user = api_handle.get_user(user_id=user_id) except TweepError, ex: print "Got a TweepError: %s." % ex return None model_user = TwitterUser.upsert_from_api_user(api_user, postgres_handle) postgres_handle.connection.commit() screen_name = model_user.screen_name if api_user.protected: print "\t %s is protected." % screen_name return model_user if not is_root_user and api_user.friends_count > MAX_FOLLOWING_COUNT: print "\t %s follows too many people, %s." % (screen_name, api_user.friends_count) model_user.save_following_ids([]) postgres_handle.connection.commit() return model_user print "Loading the people %s follows." % screen_name following_ids = []