def allshare(page_name,hours,justgetdata=None): import datetime import numpy as np from draw_corrcoef import draw_corre fbsql = fb_mysql() if page_name == 'all': alllist = [] for i in pagelist: alllist += fbsql.defind_by_self('select user_msg.msgid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) else: alllist = fbsql.defind_by_self('select user_msg.msgid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') dicttoid = {} msgdict = {} onehoursdict = {} for i in alllist: if i[0] not in msgdict: msgdict[i[0]] = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) if not onehoursdict[i[0]]: temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours+3) strtime = temptime.strftime("%Y-%m-%d %H:%M:%S") onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime)) dictx = [] dicty = [] allcount = 0 for i in msgdict: if onehoursdict[i] and msgdict[i][0]: allcount += 1 dictx.append(len(onehoursdict[i])) dicty.append(msgdict[i][0]) if justgetdata: return (onehoursdict,msgdict) testdatax = [] testdatay = [] dicttrainx = [] dicttrainy = [] argrmdict = dict() arragerdm = np.random.choice(len(dictx),int(len(dictx)*0.25),replace=False) for i in arragerdm: argrmdict[i] = None for i in range(len(dictx)): if i in argrmdict: testdatax.append(dictx[i]) testdatay.append(dicty[i]) else: dicttrainx.append(dictx[i]) dicttrainy.append(dicty[i]) print '总数据:'+str(allcount) print '训练数据:'+str(len(dicttrainx)) print '测试数据:'+str(len(testdatax)) xlabel = str(hours)+'_hours get_count' ylabel = 'share_count' title = page_name+' '+str(hours)+'_hour:all_time_true_share_count' return draw_corre(xdata=np.log10(dicttrainx),ydata=np.log10(dicttrainy),xlabelstr=xlabel, ylabelstr=ylabel, titlestr=title, testdatax=np.log10(testdatax),testdatay = np.log10(testdatay))
def drawhotfun(hotfan,onehoursdict,msgdict,page_name,hotfannum,hours,percent,hotnot): import numpy as np from draw_corrcoef import draw_corre dictx = [] dicty = [] for i in onehoursdict.keys(): tempx = 0 if onehoursdict[i]: if len(onehoursdict[i])<len(hotfan): for j in onehoursdict[i]: if j[0] in hotfan: tempx += 1 else: for j in hotfan: if (j,) in onehoursdict[i]: tempx += 1 if not hotnot: x = tempx/len(onehoursdict[i]) else: x = (len(onehoursdict[i])-tempx)/len(onehoursdict[i]) if msgdict[i][0] and x > 0: y = msgdict[i][0] dictx.append(x) dicty.append(y) testdatax = [] testdatay = [] dicttrainx = [] dicttrainy = [] argrmdict = dict() arragerdm = np.random.choice(len(dictx),int(len(dictx)*0.25),replace=False) for i in arragerdm: argrmdict[i] = None for i in range(len(dictx)): if i in argrmdict: testdatax.append(dictx[i]) testdatay.append(dicty[i]) else: dicttrainx.append(dictx[i]) dicttrainy.append(dicty[i]) print '训练数据:'+str(len(dicttrainx)) print '测试数据:'+str(len(testdatax)) xlabel = 'hot_fan_p' ylabel = 'share_count' title = page_name+' fan_difind:share_'+str(hotfannum)+percent+'up time:'+str(hours-8)+'h' return draw_corre(xdata=np.log10(dicttrainx), ydata=np.log10(dicttrainy), xlabelstr=xlabel, ylabelstr=ylabel, titlestr=title, testdatax=np.log10(testdatax), testdatay=np.log10(testdatay))
def drawtest(dataMat,labelMat,ws,page_name,hotnum,timeline,testx,testy,percent='',xlabelinput='',titleinput='',notshow = None): import numpy as np from draw_corrcoef import draw_corre testdatax = [] testdatay = [] dictx = [] dicty = [] for i in range(len(dataMat)): tempx = dataMat[i] x = ws[0]*tempx[0] + ws[1]*tempx[1] + ws[2] y = labelMat[i] dictx.append(x) dicty.append(y) for i in range(len(testx)): tempx = testx[i] x = ws[0]*tempx[0] + ws[1]*tempx[1] + ws[2] y = testy[i] testdatax.append(x) testdatay.append(y) xlabel = str(ws[0])+'*'+xlabelinput[0]+'+'+str(ws[1])+'*'+xlabelinput[1]+'+'+str(ws[2])+' '+str(timeline-8)+'h' ylabel = 'share_count' title = page_name+' '+titleinput+' time:'+str(timeline-8)+'h fan_defind:'+str(hotnum)+percent+'up' return draw_corre(xdata=dictx,ydata=dicty,xlabelstr=xlabel,ylabelstr=ylabel,titlestr=title,testdatax=testdatax,testdatay = testdatay,notshow=notshow)
def likesum_nothotfan(page_name,hotnum,timeline,percent = '',justgetdata=None): import numpy as np import datetime from draw_corrcoef import draw_corre fbsql = fb_mysql() if page_name == 'all': alllist = [] hotfan = [] for i in pagelist: eachlist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i) alllist += eachlist dicttoid = {} for j in eachlist: if j[2] in dicttoid: dicttoid[j[2]] += 1 else: dicttoid[j[2]] = 1 hotfan = dict(hotfan,**percentfan(hotnum,dicttoid,percent)) else: alllist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'') dicttoid = {} for i in alllist: if i[2] in dicttoid: dicttoid[i[2]] += 1 else: dicttoid[i[2]] = 1 hotfan = percentfan(hotnum,dicttoid,percent) timedict = dict() for i in alllist: if i[0] not in timedict: tempdict = {} tempdetail = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0] tempdict['share_count'] = tempdetail[0] tempdict['created_time'] = tempdetail[1] tempdict['line_time']=tempdict['created_time']+datetime.timedelta(hours=timeline) tempdict['all_num'] = 0 tempdict['like_sum'] = 0 tempdict['all_like_sum'] = 0 if not tempdict['share_count']: continue if i[3] < tempdict['line_time'] and i[1]!=i[2]: tempdict['all_num'] = 1 if i[4] >0: if i[2] not in hotfan: tempdict['like_sum']+=i[4] tempdict['all_like_sum']+=i[4] timedict[i[0]] = tempdict else: tempdict = timedict[i[0]] if i[3] < tempdict['line_time'] and i[1]!=i[2]: tempdict['all_num'] += 1 if i[4] >0: if i[2] not in hotfan: tempdict['like_sum']+=i[4] tempdict['all_like_sum']+=i[4] dictx = [] dicty = [] if justgetdata: return timedict for i in timedict.keys(): print i+':'+str(timedict[i]['like_sum']) if timedict[i]['all_num'] > 0 and timedict[i]['like_sum']>0: x = timedict[i]['like_sum'] dictx.append(x) dicty.append(timedict[i]['share_count']) testdatax = [] testdatay = [] dicttrainx = [] dicttrainy = [] argrmdict = dict() arragerdm = np.random.choice(len(dictx),int(len(dictx)*0.25),replace=False) for i in arragerdm: argrmdict[i] = None for i in range(len(dictx)): if i in argrmdict: testdatax.append(dictx[i]) testdatay.append(dicty[i]) else: dicttrainx.append(dictx[i]) dicttrainy.append(dicty[i]) print '训练数据:'+str(len(dicttrainx)) print '测试数据:'+str(len(testdatax)) xlabel='like_sum_notfan/all_num '+str(timeline)+'h' ylabel='share_count' title=page_name+' like_sum_notfan:share_count time:'+str(timeline-8)+'h fan_defind:'+str(hotnum)+percent+'up' return draw_corre(xdata=np.log10(dicttrainx), ydata=np.log10(dicttrainy), xlabelstr=xlabel, ylabelstr=ylabel, titlestr=title, testdatax=np.log10(testdatax), testdatay=np.log10(testdatay))