def get_rooted_network(cls, root_user, postgres_handle, go_back_this_many_weeks=2, start_here='now', distance=100): print 'Loading network in memory!' from collections import OrderedDict network = OrderedDict() network[root_user.id] = set(root_user.following_ids) if start_here == 'now': start_here = datetime.now() start_w_this_date = start_here - timedelta( days=go_back_this_many_weeks * 7) year_weeknum_strs = time_utils.year_weeknum_strs( start_w_this_date, go_back_this_many_weeks + 1) #see these: # - http://www.postgresql.org/docs/current/static/queries-with.html # - http://archives.postgresql.org/pgsql-novice/2009-01/msg00092.php qry = """ WITH only_these_ids as ( select id from unnest(%s) as id ) select u.id, f.following_ids from twitter_user u join twitter_user_following_%s f on u.id = f.twitter_user_id join only_these_ids on only_these_ids.id = u.id where u.followers_count > 10 ; """ following_following_ids = cls.get_following_following_ids( root_user, distance=distance) params = {'following_following_ids': following_following_ids} for year_weeknum in year_weeknum_strs: print 'Starting %s query!' % year_weeknum results = postgres_handle.execute_query( qry % ('%(following_following_ids)s', year_weeknum), params) print 'Done w/ %s query!' % year_weeknum for result in results: if result['id'] not in network: network[result['id']] = set(result['following_ids']) return network
def get_rooted_network(cls, root_user, postgres_handle, go_back_this_many_weeks=2, start_here='now', distance=100): print 'Loading network in memory!' from collections import OrderedDict network = OrderedDict() network[root_user.id] = set(root_user.following_ids) if start_here == 'now': start_here = datetime.now() start_w_this_date = start_here - timedelta(days=go_back_this_many_weeks * 7) year_weeknum_strs = time_utils.year_weeknum_strs(start_w_this_date, go_back_this_many_weeks + 1) #see these: # - http://www.postgresql.org/docs/current/static/queries-with.html # - http://archives.postgresql.org/pgsql-novice/2009-01/msg00092.php qry = """ WITH only_these_ids as ( select id from unnest(%s) as id ) select u.id, f.following_ids from twitter_user u join twitter_user_following_%s f on u.id = f.twitter_user_id join only_these_ids on only_these_ids.id = u.id where u.followers_count > 10 ; """ following_following_ids = cls.get_following_following_ids(root_user, distance=distance) params = {'following_following_ids':following_following_ids} for year_weeknum in year_weeknum_strs: print 'Starting %s query!' % year_weeknum results = postgres_handle.execute_query(qry % ('%(following_following_ids)s', year_weeknum), params) print 'Done w/ %s query!' % year_weeknum for result in results: if result['id'] not in network: network[result['id']] = set(result['following_ids']) return network
################################################ ##twitter_user_following ################################################ twitter_user_following = """ create table twitter_user_following_%(postfix)s( createddate timestamp not null default now(), modifieddate timestamp not null default now(), twitter_user_id text unique not null references twitter_user(id), following_ids text[] not null ); CREATE TRIGGER twitter_user_following_modified_%(postfix)s BEFORE UPDATE ON twitter_user_following_%(postfix)s FOR EACH ROW EXECUTE PROCEDURE ts_modifieddate(); """ for year_week_st in time_utils.year_weeknum_strs(datetime.now(), 50): postgres_handle.execute_query(twitter_user_following % {'postfix':year_week_st}, return_results=False) postgres_handle.connection.commit() ################################################ ##twitter_reduction ################################################ twitter_reduction = """ create table twitter_reduction( createddate timestamp not null default now(), modifieddate timestamp not null default now(), id serial unique, root_user_id text not null references twitter_user(id), user_ids text[] not null, x_coordinates real[] not null,
from datetime import datetime, timedelta import psycopg2 from smarttypes.utils.postgres_handle import PostgresHandle postgres_handle = PostgresHandle(smarttypes.connection_string) ################################################ ##get rid of old connections ##we have db dumps, so we do have an archive ##if ever needed ################################################ retention_days = 30 * 4 #about 4 months delete_before_this_date = datetime.now() - timedelta(days=retention_days) #delete users sql = """ delete from twitter_user where last_loaded_following_ids < %(delete_before_this_date)s;""" #print sql % {'delete_before_this_date':delete_before_this_date} postgres_handle.execute_query(sql, {'delete_before_this_date':delete_before_this_date}, return_results=False) postgres_handle.connection.commit() #drop tables sql = """drop table twitter_user_following_%(postfix)s;""" for year_week_st in time_utils.year_weeknum_strs(delete_before_this_date - timedelta(days=7), 20, forward=False): #print sql % {'postfix':year_week_st} postgres_handle.execute_query(sql % {'postfix':year_week_st}, return_results=False) postgres_handle.connection.commit()
################################################ ##get rid of old connections ##we have db dumps, so we do have an archive ##if ever needed ################################################ retention_days = 30 * 4 #about 4 months delete_before_this_date = datetime.now() - timedelta(days=retention_days) #delete users sql = """ delete from twitter_user where last_loaded_following_ids < %(delete_before_this_date)s;""" #print sql % {'delete_before_this_date':delete_before_this_date} postgres_handle.execute_query( sql, {'delete_before_this_date': delete_before_this_date}, return_results=False) postgres_handle.connection.commit() #drop tables sql = """drop table twitter_user_following_%(postfix)s;""" for year_week_st in time_utils.year_weeknum_strs(delete_before_this_date - timedelta(days=7), 20, forward=False): #print sql % {'postfix':year_week_st} postgres_handle.execute_query(sql % {'postfix': year_week_st}, return_results=False) postgres_handle.connection.commit()