Ejemplo n.º 1
0
def allshare(page_name,hours,justgetdata=None):
    import datetime
    import numpy as np
    from draw_corrcoef import draw_corre
    fbsql = fb_mysql()
    if page_name == 'all':
        alllist = []
        for i in pagelist:
            alllist += fbsql.defind_by_self('select user_msg.msgid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i)
    else:
        alllist = fbsql.defind_by_self('select user_msg.msgid from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'')
    dicttoid = {}
    msgdict = {}
    onehoursdict = {}
    for i in alllist:
        if i[0] not in msgdict:
            msgdict[i[0]] = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0]
            temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours)
            strtime = temptime.strftime("%Y-%m-%d %H:%M:%S")
            onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime))
            if not onehoursdict[i[0]]:
                temptime = msgdict[i[0]][1]+datetime.timedelta(hours=hours+3)
                strtime = temptime.strftime("%Y-%m-%d %H:%M:%S")
                onehoursdict[i[0]] = fbsql.defind_by_self('select toid from share_fb_msg_new where trend_mid=\'%s\' and created_time<\'%s\''%(i[0],strtime))
    dictx = []
    dicty = []
    allcount = 0
    for i in msgdict:
        if onehoursdict[i] and msgdict[i][0]:
            allcount += 1
            dictx.append(len(onehoursdict[i])) 
            dicty.append(msgdict[i][0])
    if justgetdata:
        return (onehoursdict,msgdict)
    testdatax = []
    testdatay = []
    dicttrainx = []
    dicttrainy = []
    argrmdict = dict()
    arragerdm = np.random.choice(len(dictx),int(len(dictx)*0.25),replace=False)
    for i in arragerdm:
        argrmdict[i] = None
    for i in range(len(dictx)):
        if i in argrmdict:
            testdatax.append(dictx[i])
            testdatay.append(dicty[i])
        else:
            dicttrainx.append(dictx[i])
            dicttrainy.append(dicty[i])
    print '总数据:'+str(allcount)
    print '训练数据:'+str(len(dicttrainx))
    print '测试数据:'+str(len(testdatax))
    xlabel = str(hours)+'_hours get_count'
    ylabel = 'share_count'
    title = page_name+' '+str(hours)+'_hour:all_time_true_share_count'
    return draw_corre(xdata=np.log10(dicttrainx),ydata=np.log10(dicttrainy),xlabelstr=xlabel, ylabelstr=ylabel, titlestr=title, testdatax=np.log10(testdatax),testdatay = np.log10(testdatay))
Ejemplo n.º 2
0
def drawhotfun(hotfan,onehoursdict,msgdict,page_name,hotfannum,hours,percent,hotnot): 
    import numpy as np
    from draw_corrcoef import draw_corre
    dictx = []
    dicty = []
    for i in onehoursdict.keys():
        tempx = 0
        if onehoursdict[i]:
            if len(onehoursdict[i])<len(hotfan):
                for j in onehoursdict[i]:
                    if j[0] in hotfan:
                        tempx += 1
            else:
                for j in hotfan:
                    if (j,) in onehoursdict[i]:
                        tempx += 1
            if not hotnot:
                x = tempx/len(onehoursdict[i])
            else:
                x = (len(onehoursdict[i])-tempx)/len(onehoursdict[i])
            if msgdict[i][0] and x > 0:
                y = msgdict[i][0]
                dictx.append(x)
                dicty.append(y)
    testdatax = []
    testdatay = []
    dicttrainx = []
    dicttrainy = []
    argrmdict = dict()
    arragerdm = np.random.choice(len(dictx),int(len(dictx)*0.25),replace=False)
    for i in arragerdm:
        argrmdict[i] = None
    for i in range(len(dictx)):
        if i in argrmdict:
            testdatax.append(dictx[i])
            testdatay.append(dicty[i])
        else:
            dicttrainx.append(dictx[i])
            dicttrainy.append(dicty[i])
    print '训练数据:'+str(len(dicttrainx))
    print '测试数据:'+str(len(testdatax))
    xlabel = 'hot_fan_p'
    ylabel = 'share_count'
    title = page_name+' fan_difind:share_'+str(hotfannum)+percent+'up time:'+str(hours-8)+'h'
    return draw_corre(xdata=np.log10(dicttrainx), ydata=np.log10(dicttrainy), xlabelstr=xlabel, ylabelstr=ylabel, titlestr=title, testdatax=np.log10(testdatax), testdatay=np.log10(testdatay))
Ejemplo n.º 3
0
def drawtest(dataMat,labelMat,ws,page_name,hotnum,timeline,testx,testy,percent='',xlabelinput='',titleinput='',notshow = None):
    import numpy as np
    from draw_corrcoef import draw_corre
    testdatax = []
    testdatay = []
    dictx = []
    dicty = []
    for i in range(len(dataMat)):
        tempx = dataMat[i]
        x = ws[0]*tempx[0] + ws[1]*tempx[1] + ws[2]
        y = labelMat[i]
        dictx.append(x)
        dicty.append(y)
    for i in range(len(testx)):
        tempx = testx[i]
        x = ws[0]*tempx[0] + ws[1]*tempx[1] + ws[2]
        y = testy[i]
        testdatax.append(x)
        testdatay.append(y) 
    xlabel = str(ws[0])+'*'+xlabelinput[0]+'+'+str(ws[1])+'*'+xlabelinput[1]+'+'+str(ws[2])+' '+str(timeline-8)+'h'
    ylabel = 'share_count'
    title = page_name+' '+titleinput+' time:'+str(timeline-8)+'h fan_defind:'+str(hotnum)+percent+'up'
    return draw_corre(xdata=dictx,ydata=dicty,xlabelstr=xlabel,ylabelstr=ylabel,titlestr=title,testdatax=testdatax,testdatay = testdatay,notshow=notshow)
Ejemplo n.º 4
0
def likesum_nothotfan(page_name,hotnum,timeline,percent = '',justgetdata=None):
    import numpy as np
    import datetime
    from draw_corrcoef import draw_corre
    fbsql = fb_mysql()
    if page_name == 'all':
        alllist = []
        hotfan = []
        for i in pagelist:
            eachlist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and (user_msg.fromid = \'%s\')'%i)
            alllist += eachlist
            dicttoid = {}
            for j in eachlist:
                if j[2] in dicttoid:
                    dicttoid[j[2]] += 1
                else:
                    dicttoid[j[2]] = 1
            hotfan = dict(hotfan,**percentfan(hotnum,dicttoid,percent))
    else:
        alllist = fbsql.defind_by_self('select user_msg.msgid,share_fb_msg_new.fromid,toid,share_fb_msg_new.created_time,share_fb_msg_new.like_count from share_fb_msg_new,user_msg where share_fb_msg_new.trend_mid = user_msg.msgid and user_msg.fromname=\''+page_name+'\'')
        dicttoid = {}
        for i in alllist:
            if i[2] in dicttoid:
                dicttoid[i[2]] += 1
            else:
                dicttoid[i[2]] = 1
        hotfan = percentfan(hotnum,dicttoid,percent)
    timedict = dict()
    for i in alllist:
        if i[0] not in timedict:
            tempdict = {}
            tempdetail = fbsql.defind_by_self('select share_count,created_time from user_msg where msgid=\'%s\''%i[0])[0]
            tempdict['share_count'] = tempdetail[0]
            tempdict['created_time'] = tempdetail[1]
            tempdict['line_time']=tempdict['created_time']+datetime.timedelta(hours=timeline)
            tempdict['all_num'] = 0
            tempdict['like_sum'] = 0
            tempdict['all_like_sum'] = 0
            if not tempdict['share_count']:
                continue
            if i[3] < tempdict['line_time'] and i[1]!=i[2]:
                tempdict['all_num'] = 1
                if i[4] >0:
                    if i[2] not in hotfan:
                        tempdict['like_sum']+=i[4]
                    tempdict['all_like_sum']+=i[4]
            timedict[i[0]] = tempdict
        else:
            tempdict = timedict[i[0]]
            if i[3] < tempdict['line_time'] and i[1]!=i[2]:
                tempdict['all_num'] += 1
                if i[4] >0: 
                    if i[2] not in hotfan:
                        tempdict['like_sum']+=i[4]
                    tempdict['all_like_sum']+=i[4]
    dictx = []
    dicty = []
    if justgetdata:
        return timedict
    for i in timedict.keys():
        print i+':'+str(timedict[i]['like_sum'])
        if timedict[i]['all_num'] > 0 and timedict[i]['like_sum']>0:
            x = timedict[i]['like_sum']
            dictx.append(x)
            dicty.append(timedict[i]['share_count'])
    testdatax = []
    testdatay = []
    dicttrainx = []
    dicttrainy = []
    argrmdict = dict()
    arragerdm = np.random.choice(len(dictx),int(len(dictx)*0.25),replace=False)
    for i in arragerdm:
        argrmdict[i] = None
    for i in range(len(dictx)):
        if i in argrmdict:
            testdatax.append(dictx[i])
            testdatay.append(dicty[i])
        else:
            dicttrainx.append(dictx[i])
            dicttrainy.append(dicty[i])
    print '训练数据:'+str(len(dicttrainx))
    print '测试数据:'+str(len(testdatax))
    xlabel='like_sum_notfan/all_num '+str(timeline)+'h'
    ylabel='share_count'
    title=page_name+' like_sum_notfan:share_count time:'+str(timeline-8)+'h fan_defind:'+str(hotnum)+percent+'up'
    return draw_corre(xdata=np.log10(dicttrainx), ydata=np.log10(dicttrainy), xlabelstr=xlabel, ylabelstr=ylabel, titlestr=title, testdatax=np.log10(testdatax), testdatay=np.log10(testdatay))