def __init__(self,fb,usr_id,queue,lockname): threading.Thread.__init__(self) self.fb = fb self.usr_id = usr_id self.fbsql = fb_mysql() self.queue = queue self.lock = lockname
def sharefan_num(page_name,limitnum,hot_fan_defind,percent=''): import datetime fbsql = fb_mysql() msgdict = {} onehoursdict = {} hotfan = [] if page_name == 'all': alllist = [] for i in pagelist: dicttoid = {} alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) for i in alllist: if i[0] not in msgdict: msgdict[i[0]] = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' order by(created_time) limit %s'%(i[0],limitnum)) if i[1] in dicttoid: dicttoid[i[1]] += 1 else: dicttoid[i[1]] = 1 hotfan = dict(percentfan(hot_fan_defind,dicttoid,percent),**hotfan) else: alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') dicttoid = {} for i in alllist: if i[0] not in msgdict: msgdict[i[0]] = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' order by(created_time) limit %s'%(i[0],limitnum)) if i[1] in dicttoid: dicttoid[i[1]] += 1 else: dicttoid[i[1]] = 1 hotfan = percentfan(hot_fan_defind,dicttoid,percent) drawhotfun_num(hotfan,onehoursdict,msgdict,page_name,limitnum,str(hot_fan_defind)+percent)
def seepercent(page_name): import datetime import numpy as np from matplotlib.pyplot import * fbsql = fb_mysql() if page_name == 'all': alllist = [] for i in pagelist: alllist += fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) #alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromname = \'history\' or user_msg.fromname = \'Will Smith\' or user_msg.fromname = \'nba\')') else: alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') dicttoid = {} msgdict = {} onehoursdict = {} for i in alllist: if i[0] not in msgdict: msgdict[i[0]] = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] temptime = msgdict[i[0]][1]+datetime.timedelta(hours=11) #temptime = datetime.datetime.strptime(msgdict[i[0]][1],"%Y-%m-%d %H:%M:%S")+datetime.timedelta(hours=1) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) dictxy = dict() for i in msgdict: if onehoursdict[i]: dictxy[len(onehoursdict[i])] = fbsql.defind_by_self('select count(postid) from share_fb_msg_new where trend_mid=\'%s\' group by(trend_mid)'%i)[0] for i in dictxy: print str(i)+':'+str(dictxy[i]) xlabel('11_hours get_count') ylabel('all_get') title(page_name+' 11_hour:all_time') #plot(dictavg.keys(),dictavg.values(),'ro') plot(dictxy.keys(),dictxy.values(),'ro') show()
def allshare(page_name,hours,justgetdata=None): import datetime import numpy as np from draw_corrcoef import draw_corre fbsql = fb_mysql() if page_name == 'all': alllist = [] for i in pagelist: alllist += fbsql.defind_by_self('select user_msg.msgid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) else: alllist = fbsql.defind_by_self('select user_msg.msgid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') dicttoid = {} msgdict = {} onehoursdict = {} for i in alllist: if i[0] not in msgdict: msgdict[i[0]] = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) if not onehoursdict[i[0]]: temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours+3) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) dictx = [] dicty = [] allcount = 0 for i in msgdict: if onehoursdict[i] and msgdict[i][0]: allcount += 1 dictx.append(len(onehoursdict[i])) dicty.append(msgdict[i][0]) if justgetdata: return (onehoursdict,msgdict) testdatax = [] testdatay = [] dicttrainx = [] dicttrainy = [] argrmdict = dict() arragerdm = np.random.choice(len(dictx),int(len(dictx)*0.25),replace=False) for i in arragerdm: argrmdict[i] = None for i in range(len(dictx)): if i in argrmdict: testdatax.append(dictx[i]) testdatay.append(dicty[i]) else: dicttrainx.append(dictx[i]) dicttrainy.append(dicty[i]) print '总数据:'+str(allcount) print '训练数据:'+str(len(dicttrainx)) print '测试数据:'+str(len(testdatax)) xlabel = str(hours)+'_hours get_count' ylabel = 'share_count' title = page_name+' '+str(hours)+'_hour:all_time_true_share_count' return draw_corre(xdata=np.log10(dicttrainx),ydata=np.log10(dicttrainy),xlabelstr=xlabel, ylabelstr=ylabel, titlestr=title, testdatax=np.log10(testdatax),testdatay = np.log10(testdatay))
def __init__(self,cookies,dictt,uid,usr_name,dyn): threading.Thread.__init__(self) self.fbsql = fb_mysql() self.fb = facebook_graph() self.fb.uid = uid self.fb.cookies = cookies self.dyn = dyn self.dictm = dictt self.dictm['from_name'] = usr_name self.usr_name = usr_name
def __init__(self,u_info,userid): multiprocessing.Process.__init__(self) self.fb = facebook_graph() self.fb.login(u_info['user_name'],u_info['password']) self.fb.getaccess_token() self.fbsql = fb_mysql() self.act = self.fb.access_token self.dyn = u_info['dyn'] self.uid = u_info['uid'] self.cookies = self.fb.cookies self.fb.uid = u_info['uid'] self.fb.dyn = u_info['dyn'] self.usr_id = userid
def sharefan(page_name,hotfannum,hours,percent = '',justgetdata=None,hotnot=None): import datetime fbsql = fb_mysql() if page_name == 'all': alllist = [] msgdict = {} onehoursdict = {} hotfan = [] for i in pagelist: alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) dicttoid = {} for i in alllist: if i[0] not in msgdict: msgdict[i[0]] = fbsql.defind_by_self('select share_count,created_time,fromname from user_msg where msgid=\'%s\''%i[0])[0] temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) if not onehoursdict[i[0]]: temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours+3) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) if i[1] in dicttoid: dicttoid[i[1]] += 1 else: dicttoid[i[1]] = 1 hotfan = dict(hotfan,**percentfan(hotfannum,dicttoid,percent)) #alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromname = \'history\' or user_msg.fromname = \'Will Smith\' or user_msg.fromname = \'nba\')') else: alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') dicttoid = {} msgdict = {} onehoursdict = {} for i in alllist: if i[0] not in msgdict: msgdict[i[0]] = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) if not onehoursdict[i[0]]: temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours+3) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) if i[1] in dicttoid: dicttoid[i[1]] += 1 else: dicttoid[i[1]] = 1 hotfan = percentfan(hotfannum,dicttoid,percent) if justgetdata: return (hotfan,onehoursdict,msgdict) return drawhotfun(hotfan,onehoursdict,msgdict,page_name,hotfannum,hours,percent,hotnot)
def deep_fan(page_name,strchoose,strgetdata = None): import numpy as np import datetime fbsql = fb_mysql() if page_name == 'all': alllist = [] for i in pagelist: alllist += fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) #alllist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromname = \'history\' or user_msg.fromname = \'Will Smith\' or user_msg.fromname = \'nba\')') else: alllist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') timedict = dict() for i in alllist: if i[0] not in timedict: tempdict = {} tempdetail = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] tempdict['share_count'] = tempdetail[0] tempdict['created_time'] = tempdetail[1] tempdict['line_time']=tempdict['created_time']+datetime.timedelta(hours=11) #tempdict['share'] = dict() tempdict['like_num'] = 0 tempdict['like_max'] = 0 tempdict['all_num'] = 0 tempdict['like_sum'] = 0 if not tempdict['share_count']: continue if i[3] < tempdict['line_time'] and i[1]!=i[2]: #tempdict['share'][i[1]] = [i[2]] tempdict['all_num'] = 1 if i[4] >0: tempdict['like_num']+=1 tempdict['like_sum']+=i[4] if i[4]>tempdict['like_max']: tempdict['like_max'] = i[4] timedict[i[0]] = tempdict else: tempdict = timedict[i[0]] if i[3] < tempdict['line_time'] and i[1]!=i[2]: tempdict['all_num'] += 1 if i[4] >0: tempdict['like_num']+=1 tempdict['like_sum']+=i[4] if i[4]>tempdict['like_max']: tempdict['like_max'] = i[4] if strchoose == 'num': drawlikenum(timedict,page_name,strgetdata) elif strchoose == 'sum': drawlikesum(timedict,page_name,strgetdata) else: drawlikedefind(timedict,page_name,strgetdata)
def share_time(page_name,hoursdefind): import numpy as np import datetime import numpy as np from matplotlib.pyplot import * fbsql = fb_mysql() if page_name == 'all': alllist = [] for i in pagelist: alllist += fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.created_time from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) #alllist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.created_time from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromname = \'history\' or user_msg.fromname = \'Will Smith\' or user_msg.fromname = \'nba\')') else: alllist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.created_time from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') timedict = dict() time_count = {} for i in range(hoursdefind): time_count[i] = 0 for i in alllist: if i[0] not in timedict: tempdict = {} tempdict['created_time'] = fbsql.defind_by_self('select created_time from user_msg where msgid=\'%s\''%i[0])[0][0] tempdict['time_count'] = {} minus = i[1] - tempdict['created_time'] temphours = minus.days*24 + minus.seconds//3600 if temphours < hoursdefind and temphours > 0: print i[1] print tempdict['created_time'] print temphours time_count[temphours] += 1 timedict[i[0]] = tempdict else: tempdict = timedict[i[0]] minus = i[1] - tempdict['created_time'] temphours = minus.days*24 + minus.seconds//3600 if temphours < hoursdefind and temphours > 0: time_count[temphours] += 1 dictxy={} msgnum = len(timedict) for i in time_count.keys(): print str(i)+':'+str(time_count[i]) y = time_count[i]/msgnum dictxy[i] = y xlabel('each_hours') ylabel('avg_share') title(page_name+'avg share num of each time') plot(dictxy.keys(),dictxy.values(),'ro') show()
def post_time(page_name,choose = 'page'): import datetime import numpy as np from matplotlib.pyplot import * fbsql = fb_mysql() basictime = datetime.datetime.strptime('00:00:00','%H:%M:%S') if choose == 'page': if page_name == 'all': alllist = [] for i in pagelist: alllist += fbsql.defind_by_self('select created_time from user_msg where fromid = \'%s\''%i) else: alllist = fbsql.defind_by_self('select created_time from user_msg where fromname=\''+page_name+'\'') else: if page_name == 'all': alllist = [] for i in pagelist: alllist += fbsql.defind_by_self('select share_fb_msg_new.created_time from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromid = \'%s\''%i) else: alllist = fbsql.defind_by_self('select share_fb_msg_new.created_time from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') time_count = {} for i in range(48): time_count[i*0.5] = 0 for i in alllist: if i[0]: minus = i[0] - basictime if choose == 'page': time_count[(minus.seconds//1800)*0.5] += 1 else: temp8 = ((minus.seconds//1800)+32)%48 time_count[temp8*0.5] += 1 xx = [] yy = [] for i in range(48): xx.append(i*0.5) yy.append(time_count[i*0.5]) bar(time_count.keys(),time_count.values(),width=0.5,align = 'edge',color='green') plot(xx,yy, 'r--') xlabel('each 0.5h') if choose == 'page': ylabel('page post count') title(page_name+' page post count per 0.5h') else: ylabel('user post count') title(page_name+' user post count per 0.5h') show()
def deep_fan_num(page_name,limit_num): import numpy as np import datetime fbsql = fb_mysql() sharedict = dict() if page_name=='all': msglist = [] for i in pagelist: msglist += fbsql.defind_by_self('select share_fb_msg_new.trend_mid, user_msg.share_count from user_msg,share_fb_msg_new where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\') group by(share_fb_msg_new.trend_mid)'%i) #msglist = fbsql.defind_by_self('select share_fb_msg_new.trend_mid, user_msg.share_count from user_msg,share_fb_msg_new where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromname = \'history\' or user_msg.fromname = \'Will Smith\' or user_msg.fromname = \'nba\') group by(share_fb_msg_new.trend_mid)') else: msglist = fbsql.defind_by_self('select share_fb_msg_new.trend_mid, user_msg.share_count from user_msg,share_fb_msg_new where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname =\'%s\' group by(share_fb_msg_new.trend_mid)'%(page_name)) for i in msglist: templist = fbsql.defind_by_self('select like_count from share_fb_msg_new where trend_mid=\'%s\' order by(created_time) limit %s'%(i[0],limit_num)) sharedict[i[0]] = {} sharedict[i[0]]['share_count'] = i[1] sharedict[i[0]]['like_num'] = 0 for j in templist: if j > 0: sharedict[i[0]]['like_num'] += 1 drawlikenum_num(sharedict,page_name,limit_num)
def change_draw_fan(page_name,hotfannum,hours): import datetime import numpy as np from matplotlib.pyplot import * fbsql = fb_mysql() begin_time = datetime.datetime(2015, 12, 15) hotfan = list() for k in range(40): hotfan.append(0) timenow_time = (begin_time + datetime.timedelta(days=k)).strftime("%Y-%m-%d %H:%M:%S") print timenow_time alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\' and user_msg.created_time<\''+timenow_time+'\'') dicttoid = {} msgdict = {} onehoursdict = {} for i in alllist: if i[0] not in msgdict: msgdict[i[0]] = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) if not onehoursdict[i[0]]: temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours+3) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) if i[1] in dicttoid: dicttoid[i[1]] += 1 else: dicttoid[i[1]] = 1 for j in dicttoid.keys(): if dicttoid[j]>=hotfannum: hotfan[k] += 1 hotfan[k] = hotfan[k]/len(dicttoid) print hotfan[k] xlabel('begin at 2015-12-15 each 1 day') ylabel('hot_fan_num') title('Tsai Ing-wen hotfan percent') #plot(dictavg.keys(),dictavg.values(),'ro') plot(range(40), hotfan) show()
def fandefind(page_name): fbsql = fb_mysql() if page_name=='all': alllist = [] for i in pagelist: alllist += fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) #alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromname = \'history\' or user_msg.fromname = \'Will Smith\' or user_msg.fromname = \'nba\')') else: alllist = fbsql.defind_by_self('select user_msg.msgid,toid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') dicttoid = {} for i in alllist: if i[1] in dicttoid: dicttoid[i[1]] += 1 else: dicttoid[i[1]] = 1 dictnum = {} peoplenum = len(dicttoid) for i in dicttoid.keys(): if dicttoid[i] in dictnum: dictnum[dicttoid[i]] += 1 else: dictnum[dicttoid[i]] = 1 drawline(dictnum,page_name,peoplenum)
def __init__(self, fb, fid, act): threading.Thread.__init__(self) self.fid = fid self.fbsql = fb_mysql() self.fb = fb
def __init__(self,queue,fb,lockname): threading.Thread.__init__(self) self.fbsql = fb_mysql() self.fb = fb self.lock = lockname self.queue = queue
from trend_topic import get_share_post from trend_topic import getmsgsend import datetime import time from save_post import fb_post_comment from save_post import fb_post_sharelike from _fb_commensql import fb_mysql import threading from fb_friend_thread import fb_friend from trend_topic import get_user_msg while(1): uid = '100008105866583' fb = facebook_graph() fb.login("*****@*****.**","blueapple") fb.getaccess_token() fbsql = fb_mysql() act = fb.access_token cookies = fb.cookies fb.uid = uid usr_id = '326683984410' #(dictmsg,usr_name) = get_user_msg(fb,fbsql,usr_id) gettime = datetime.datetime.now()+datetime.timedelta(days=-15) msglist = fbsql.get_line_msg(gettime.strftime("%y-%m-%d %H:%M:%S")) threads = [] for i in msglist: print i msgtemp = dict() msgtemp['msgid'] = i[0] msgtemp['fromid'] = i[1] msgtemp['from_name'] = i[2] msgtemp['name'] = i[3]
def likesum_nothotfan(page_name,hotnum,timeline,percent = '',justgetdata=None): import numpy as np import datetime from draw_corrcoef import draw_corre fbsql = fb_mysql() if page_name == 'all': alllist = [] hotfan = [] for i in pagelist: eachlist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) alllist += eachlist dicttoid = {} for j in eachlist: if j[2] in dicttoid: dicttoid[j[2]] += 1 else: dicttoid[j[2]] = 1 hotfan = dict(hotfan,**percentfan(hotnum,dicttoid,percent)) else: alllist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') dicttoid = {} for i in alllist: if i[2] in dicttoid: dicttoid[i[2]] += 1 else: dicttoid[i[2]] = 1 hotfan = percentfan(hotnum,dicttoid,percent) timedict = dict() for i in alllist: if i[0] not in timedict: tempdict = {} tempdetail = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] tempdict['share_count'] = tempdetail[0] tempdict['created_time'] = tempdetail[1] tempdict['line_time']=tempdict['created_time']+datetime.timedelta(hours=timeline) tempdict['all_num'] = 0 tempdict['like_sum'] = 0 tempdict['all_like_sum'] = 0 if not tempdict['share_count']: continue if i[3] < tempdict['line_time'] and i[1]!=i[2]: tempdict['all_num'] = 1 if i[4] >0: if i[2] not in hotfan: tempdict['like_sum']+=i[4] tempdict['all_like_sum']+=i[4] timedict[i[0]] = tempdict else: tempdict = timedict[i[0]] if i[3] < tempdict['line_time'] and i[1]!=i[2]: tempdict['all_num'] += 1 if i[4] >0: if i[2] not in hotfan: tempdict['like_sum']+=i[4] tempdict['all_like_sum']+=i[4] dictx = [] dicty = [] if justgetdata: return timedict for i in timedict.keys(): print i+':'+str(timedict[i]['like_sum']) if timedict[i]['all_num'] > 0 and timedict[i]['like_sum']>0: x = timedict[i]['like_sum'] dictx.append(x) dicty.append(timedict[i]['share_count']) testdatax = [] testdatay = [] dicttrainx = [] dicttrainy = [] argrmdict = dict() arragerdm = np.random.choice(len(dictx),int(len(dictx)*0.25),replace=False) for i in arragerdm: argrmdict[i] = None for i in range(len(dictx)): if i in argrmdict: testdatax.append(dictx[i]) testdatay.append(dicty[i]) else: dicttrainx.append(dictx[i]) dicttrainy.append(dicty[i]) print '训练数据:'+str(len(dicttrainx)) print '测试数据:'+str(len(testdatax)) xlabel='like_sum_notfan/all_num '+str(timeline)+'h' ylabel='share_count' title=page_name+' like_sum_notfan:share_count time:'+str(timeline-8)+'h fan_defind:'+str(hotnum)+percent+'up' return draw_corre(xdata=np.log10(dicttrainx), ydata=np.log10(dicttrainy), xlabelstr=xlabel, ylabelstr=ylabel, titlestr=title, testdatax=np.log10(testdatax), testdatay=np.log10(testdatay))
def twolikesum_nothotfan(page_name,hotnum,hotnum2,timeline): import numpy as np import datetime from matplotlib.pyplot import * fbsql = fb_mysql() if page_name == 'all': alllist = [] for i in pagelist: alllist += fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) #alllist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromname = \'history\' or user_msg.fromname = \'Will Smith\' or user_msg.fromname = \'nba\')') else: alllist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') timedict = dict() dicttoid = {} for i in alllist: if i[2] in dicttoid: dicttoid[i[2]] += 1 else: dicttoid[i[2]] = 1 hotfan = {} for i in dicttoid.keys(): if dicttoid[i] >= hotnum: hotfan[i] = None hotfan2 = {} for i in dicttoid.keys(): if dicttoid[i] >= hotnum2: hotfan2[i] = None print 'hotfan > 80:'+str(hotfan2) for i in alllist: if i[0] not in timedict: tempdict = {} tempdetail = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] tempdict['share_count'] = tempdetail[0] tempdict['created_time'] = tempdetail[1] tempdict['line_time']=tempdict['created_time']+datetime.timedelta(hours=timeline) tempdict['all_num'] = 0 tempdict['like_sum'] = 0 tempdict['like_sum2'] = 0 if not tempdict['share_count']: continue if i[3] < tempdict['line_time'] and i[1]!=i[2]: tempdict['all_num'] = 1 if i[4] >0 and (i[2] not in hotfan): tempdict['like_sum']+=i[4] if i[4] >0 and (i[2] in hotfan2): tempdict['like_sum2']+=i[4] timedict[i[0]] = tempdict else: tempdict = timedict[i[0]] if i[3] < tempdict['line_time'] and i[1]!=i[2]: tempdict['all_num'] += 1 if i[4] >0 and (i[2] not in hotfan): tempdict['like_sum']+=i[4] if i[4] >0 and (i[2] in hotfan2): tempdict['like_sum2']+=i[4] dictxy={} dictxy2={} for i in timedict.keys(): print i+':'+str(timedict[i]['like_sum']) if timedict[i]['all_num'] > 0: #x = timedict[i]['like_sum']/timedict[i]['all_num'] x = timedict[i]['like_sum'] dictxy[x] = timedict[i]['share_count'] x = timedict[i]['like_sum2'] dictxy2[x] = timedict[i]['share_count'] xlabel('like_sum_notfan/all_num '+str(timeline)+'h') ylabel('share_count') print dictxy2 title(page_name+' like_sum_notfan:share_count time:'+str(timeline)+'h fan_defind:'+str(hotnum)+'-'+str(hotnum2)+'up') #plot(np.log10(dictxy.keys()),np.log10(dictxy.values()),'ro') plot(np.log10(dictxy2.keys()),np.log10(dictxy2.values()),'*') show()