def post(self): # 从客户端获取信息 try: latest_time = self.get_argument('latest_time') latest_timestamp = self.get_argument('latest_timestamp') container_id = self.get_argument('container_id') self.write('success') self.finish() print('Success: to get data from web') except Exception as e: self.write('fail to return user history') self.finish() print('Error:server-HistoryReturn:' 'Unable to get value from http package,Reason:') print(e) return dbi = MySQL_Interface() checkin_timestamp = int(time.time()) col_info = dbi.get_col_name('cache_history') data = dict(latest_time=latest_time, latest_timestamp=latest_timestamp, container_id=container_id, checkin_timestamp=checkin_timestamp) keys = data.keys() insert_data = [[ data[item] if item in keys else None for item in col_info ]] dbi.insert_asList('cache_history', insert_data)
def post(self): # 从客户端获取信息 try: latest_time=self.get_argument('latest_time') latest_timestamp=self.get_argument('latest_timestamp') container_id=self.get_argument('container_id') self.write('success') self.finish() print('Success: to get data from web') except Exception as e: self.write('fail to return user history') self.finish() print('Error:server-HistoryReturn:' 'Unable to get value from http package,Reason:') print(e) return dbi=MySQL_Interface() checkin_timestamp=int(time.time()) col_info=dbi.get_col_name('cache_history') data=dict( latest_time=latest_time, latest_timestamp=latest_timestamp, container_id=container_id, checkin_timestamp=checkin_timestamp ) keys=data.keys() insert_data=[[data[item] if item in keys else None for item in col_info]] dbi.insert_asList('cache_history',insert_data)
class deal_cache_attends(threading.Thread): def __init__(self): threading.Thread.__init__(self) dbi = MySQL_Interface() self.dbi = dbi self.bf = BloomFilter() def run(self): bag = [] uid_bag = [] #与bag类似,只不过存储uid bag_size = 1000 #100次插入一次 ready_to_get_col = self.dbi.get_col_name('ready_to_get') cache_attends_col = self.dbi.get_col_name('cache_attends') while True: query = 'select * from cache_attends limit 5000' res = self.dbi.select_asQuery(query) if res.__len__() == 0: if bag.__len__() > 0: self.dbi.insert_asList('ready_to_get', bag, unique=True) bag = [] # self.bf.insert_asList(uid_bag,'ready_to_get') uid_bag = [] time.sleep(1) self.dbi = MySQL_Interface() #更新dbi continue print('thread cache attends is working') for line in res: raw_id = line[cache_attends_col.index('uid')] in_user_info = self.bf.isContains(raw_id, 'user_info_table') #此处可优化 if not in_user_info: data = [ line[cache_attends_col.index(col)] if col in cache_attends_col else None for col in ready_to_get_col ] bag.append(data) uid_bag.append(raw_id) if bag.__len__() > bag_size: self.dbi.insert_asList('ready_to_get', bag, unique=True) # self.bf.insert_asList(uid_bag,'ready_to_get') print('insert once') bag = [] uid_bag = [] self.dbi.delete_line('cache_attends', 'uid', raw_id) # 此处可优化 def isInUserInfo(self, in_uid): col_user_info = self.dbi.get_col_name('user_info_table') query = 'select * from user_info_table where uid={uid}'.format( uid=in_uid) res = self.dbi.select_asQuery(query) if res.__len__() == 0: return False else: return True
class deal_cache_attends(threading.Thread): def __init__(self): threading.Thread.__init__(self) dbi=MySQL_Interface() self.dbi=dbi self.bf=BloomFilter() def run(self): bag=[] uid_bag=[] #与bag类似,只不过存储uid bag_size=1000 #100次插入一次 ready_to_get_col=self.dbi.get_col_name('ready_to_get') cache_attends_col=self.dbi.get_col_name('cache_attends') while True: query='select * from cache_attends limit 5000' res=self.dbi.select_asQuery(query) if res.__len__()==0: if bag.__len__()>0: self.dbi.insert_asList('ready_to_get',bag,unique=True) bag=[] # self.bf.insert_asList(uid_bag,'ready_to_get') uid_bag=[] time.sleep(1) self.dbi=MySQL_Interface() #更新dbi continue print('thread cache attends is working') for line in res: raw_id=line[cache_attends_col.index('uid')] in_user_info=self.bf.isContains(raw_id,'user_info_table') #此处可优化 if not in_user_info: data=[line[cache_attends_col.index(col)] if col in cache_attends_col else None for col in ready_to_get_col] bag.append(data) uid_bag.append(raw_id) if bag.__len__()>bag_size: self.dbi.insert_asList('ready_to_get',bag,unique=True) # self.bf.insert_asList(uid_bag,'ready_to_get') print('insert once') bag=[] uid_bag=[] self.dbi.delete_line('cache_attends','uid',raw_id) # 此处可优化 def isInUserInfo(self,in_uid): col_user_info=self.dbi.get_col_name('user_info_table') query='select * from user_info_table where uid={uid}'.format(uid=in_uid) res=self.dbi.select_asQuery(query) if res.__len__()==0: return False else: return True
class deal_cache_user_info(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.dbi = MySQL_Interface() self.bf = BloomFilter() def run(self): while True: if self.dbi.is_empty('cache_user_info'): time.sleep(2) self.dbi = MySQL_Interface() continue [res, cache_user_info_col] = self.dbi.select_all('cache_user_info') time_stick = time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # insert into user info table user_info_table_col = self.dbi.get_col_name('user_info_table') data = [[ line[cache_user_info_col.index(col)] if col in cache_user_info_col else time_stick if col == 'insert_time' else None if col == 'update_time' else None if col == 'latest_blog' else None if col == 'isGettingBlog' else '' for col in user_info_table_col ] for line in res] uid_list = [ line[user_info_table_col.index('uid')] for line in data ] self.dbi.insert_asList('user_info_table', data, unique=True) # 插入 user info table self.bf.insert_asList(uid_list, 'user_info_table') print('insert {num} users into user info table'.format( num=data.__len__())) uid_list = [line[cache_user_info_col.index('uid')] for line in res] q1 = "delete from {table_name} where uid in ( {id_str_list} ) ;" # 从cache user info 中删除 id_str_list = '' for i in uid_list: id_str_list = id_str_list + '\'' + str(i) + '\'' + ',' id_str_list = id_str_list[:-1] query = q1.format(id_str_list=id_str_list, table_name='cache_user_info') self.dbi.cur.execute(query) self.dbi.conn.commit() query = q1.format(id_str_list=id_str_list, table_name='ready_to_get') self.dbi.cur.execute(query) self.dbi.conn.commit()
class deal_cache_user_info(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.dbi=MySQL_Interface() self.bf=BloomFilter() def run(self): while True: if self.dbi.is_empty('cache_user_info'): time.sleep(2) self.dbi=MySQL_Interface() continue [res,cache_user_info_col]=self.dbi.select_all('cache_user_info') time_stick=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # insert into user info table user_info_table_col=self.dbi.get_col_name('user_info_table') data= [ [ line[cache_user_info_col.index(col)] if col in cache_user_info_col else time_stick if col=='insert_time' else None if col=='update_time' else None if col=='latest_blog' else None if col=='isGettingBlog' else '' for col in user_info_table_col ] for line in res] uid_list=[line[user_info_table_col.index('uid')] for line in data] self.dbi.insert_asList('user_info_table',data,unique=True) # 插入 user info table self.bf.insert_asList(uid_list,'user_info_table') print('insert {num} users into user info table'.format(num=data.__len__())) uid_list=[line[cache_user_info_col.index('uid')] for line in res] q1="delete from {table_name} where uid in ( {id_str_list} ) ;" # 从cache user info 中删除 id_str_list='' for i in uid_list: id_str_list=id_str_list+'\''+str(i)+'\''+',' id_str_list=id_str_list[:-1] query=q1.format(id_str_list=id_str_list,table_name='cache_user_info') self.dbi.cur.execute(query) self.dbi.conn.commit() query=q1.format(id_str_list=id_str_list,table_name='ready_to_get') self.dbi.cur.execute(query) self.dbi.conn.commit()
class state_persistance(threading.Thread): """ function: monitor and note the state of proxy pool,including the current size of proxy pool, the input speed of new proxy , and the output speed. and manage the average size oj of proxy_pool class """ def __init__(self,proxy_pool): threading.Thread.__init__(self) self.proxy_pool=proxy_pool self.dbi=MySQL_Interface() def run(self): while True: time_stick=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) current_size=self.proxy_pool.size() [input,output]=self.proxy_pool.update_proxy_state() insert_value=[[current_size,time_stick,input,output]] self.dbi.insert_asList('proxy_table',insert_value,unique=True) time.sleep(server_config.PROXY_MONITOR_GAP)
class state_persistance(threading.Thread): """ function: monitor and note the state of proxy pool,including the current size of proxy pool, the input speed of new proxy , and the output speed. and manage the average size oj of proxy_pool class """ def __init__(self, proxy_pool): threading.Thread.__init__(self) self.proxy_pool = proxy_pool self.dbi = MySQL_Interface() def run(self): while True: time_stick = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) current_size = self.proxy_pool.size() [input, output] = self.proxy_pool.update_proxy_state() insert_value = [[current_size, time_stick, input, output]] self.dbi.insert_asList('proxy_table', insert_value, unique=True) time.sleep(server_config.PROXY_MONITOR_GAP)
def post(self): try: user_basic_info=self.get_argument('user_basic_info') attends=self.get_argument('user_attends') user_basic_info=eval(user_basic_info) attends=eval(attends) self.write('success to return user info') self.finish() except: self.write('fail to return user info') self.finish() return try: dbi=MySQL_Interface() except: print('unable to connect to MySql DB') try: if attends.__len__()>0: #store attends info table_name='cache_attends' attends_col_info=dbi.get_col_name(table_name) keys=attends[0].keys() attends= [[line[i] if i in keys else '' for i in attends_col_info] for line in attends] fans_col_pos=attends_col_info.index('fans_num') insert_attends=[] for line in attends: if line[fans_col_pos]>1000: insert_attends.append(line) dbi.insert_asList(table_name,insert_attends,unique=True) print('Success : attends of {uid} is stored in {tname}' .format(uid=user_basic_info['uid'],tname=table_name)) else: pass except Exception as e: print(e) path="temp\\{uid}_attends.pkl".format(uid=user_basic_info['uid']) print('unable to store attends of {uid}, it will be stored ' .format(uid=user_basic_info['uid'])) FI.save_pickle(attends,path) try: atten_num_real=user_basic_info['attends_num'] atten_num_get=attends.__len__() user_basic_info['accuracy']=atten_num_get # 实际获取到的关注数目 col_info=dbi.get_col_name('cache_user_info') # store user basic info keys=user_basic_info.keys() data=[user_basic_info[i] if i in keys else '' for i in col_info] dbi.insert_asList('cache_user_info',[data],unique=True) print('Success : basic info of {uid} is stored in cache_user_info' .format(uid=user_basic_info['uid'])) except Exception as e: print(e) path='temp\\{uid}_basic_info.pkl'.format(uid=user_basic_info['uid']) print('unable to store basic info of {uid} , it will be stored' .format(uid=user_basic_info['uid'])) FI.save_pickle(user_basic_info,path) try: if attends.__len__()>0: # store atten connection web from_uid=user_basic_info['uid'] from_fans_num=user_basic_info['fans_num'] from_blog_num=user_basic_info['blog_num'] data=[[from_uid,from_fans_num,from_blog_num,str(x[attends_col_info.index('uid')]),str(x[attends_col_info.index('fans_num')]),str(x[attends_col_info.index('blog_num')])]for x in attends] dbi.insert_asList('cache_atten_web',data) print('Success : conn web of {uid} is stored in cache_atten_web' .format(uid=user_basic_info['uid'])) else: pass except Exception as e: print(e) path='{uid}_atten_web.pkl'.format(uid=user_basic_info['uid']) print('unable to store atten web of {uid} , it will be stored' .format(uid=user_basic_info['uid'])) FI.save_pickle(data,path)
def post(self): try: user_basic_info = self.get_argument('user_basic_info') attends = self.get_argument('user_attends') user_basic_info = eval(user_basic_info) attends = eval(attends) self.write('success to return user info') self.finish() except: self.write('fail to return user info') self.finish() return try: dbi = MySQL_Interface() except: print('unable to connect to MySql DB') try: if attends.__len__() > 0: #store attends info table_name = 'cache_attends' attends_col_info = dbi.get_col_name(table_name) keys = attends[0].keys() attends = [[ line[i] if i in keys else '' for i in attends_col_info ] for line in attends] fans_col_pos = attends_col_info.index('fans_num') insert_attends = [] for line in attends: if line[fans_col_pos] > 1000: insert_attends.append(line) dbi.insert_asList(table_name, insert_attends, unique=True) print('Success : attends of {uid} is stored in {tname}'.format( uid=user_basic_info['uid'], tname=table_name)) else: pass except Exception as e: print(e) path = "temp" + os.sep + "{uid}_attends.pkl".format( uid=user_basic_info['uid']) print( 'unable to store attends of {uid}, it will be stored '.format( uid=user_basic_info['uid'])) FI.save_pickle(attends, path) try: atten_num_real = user_basic_info['attends_num'] atten_num_get = attends.__len__() user_basic_info['accuracy'] = atten_num_get # 实际获取到的关注数目 col_info = dbi.get_col_name( 'cache_user_info') # store user basic info keys = user_basic_info.keys() data = [user_basic_info[i] if i in keys else '' for i in col_info] dbi.insert_asList('cache_user_info', [data], unique=True) print('Success : basic info of {uid} is stored in cache_user_info'. format(uid=user_basic_info['uid'])) except Exception as e: print(e) path = 'temp' + os.sep + '{uid}_basic_info.pkl'.format( uid=user_basic_info['uid']) print('unable to store basic info of {uid} , it will be stored'. format(uid=user_basic_info['uid'])) FI.save_pickle(user_basic_info, path) try: if attends.__len__() > 0: # store atten connection web from_uid = user_basic_info['uid'] from_fans_num = user_basic_info['fans_num'] from_blog_num = user_basic_info['blog_num'] data = [[ from_uid, from_fans_num, from_blog_num, str(x[attends_col_info.index('uid')]), str(x[attends_col_info.index('fans_num')]), str(x[attends_col_info.index('blog_num')]) ] for x in attends] dbi.insert_asList('cache_atten_web', data) print( 'Success : conn web of {uid} is stored in cache_atten_web'. format(uid=user_basic_info['uid'])) else: pass except Exception as e: print(e) path = '{uid}_atten_web.pkl'.format(uid=user_basic_info['uid']) print('unable to store atten web of {uid} , it will be stored'. format(uid=user_basic_info['uid'])) FI.save_pickle(data, path)