Beispiel #1
0
def standRegres_like(page_name,hotnum,timeline,percent = '',notshow = None):
    from fan import sharefan
    from fan import likesum_nothotfan
    from numpy import *
    #loaddata
    dataMat = []
    labelMat = []
    timedict = likesum_nothotfan(page_name=page_name,hotnum=hotnum,timeline=timeline,percent=percent,justgetdata=True)
    lendatanum = 0
    for i in timedict:
        if timedict[i]['all_like_sum']>0:
            lendatanum += 1
            if timedict[i]['like_sum'] <= 0:
                dataMat.append([log10(timedict[i]['all_like_sum']),log10(1),1.0])
            else:
                dataMat.append([log10(timedict[i]['all_like_sum']),log10(timedict[i]['like_sum']),1.0])
            labelMat.append(log10(timedict[i]['share_count']))
    print '总条数:'+str(lendatanum)
    print '总数据:'+str(len(dataMat))
    testx = []
    testy = []
    (dataMat,labelMat,testx,testy) = devide_data(dataMat,labelMat)
    print '训练数据:'+str(len(dataMat))
    print '测试数据:'+str(len(testx))
    xMat = mat(dataMat); yMat = mat(labelMat).T
    xTx = xMat.T*xMat 
    if linalg.det(xTx) == 0.0:
        print '没有解'
        print xTx
        return
    ws = xTx.I * (xMat.T*yMat)
    return drawtest(dataMat,labelMat,ws.getA1(),page_name,hotnum,timeline,testx,testy,percent=percent,xlabelinput=['like_hourall','like_nothotfan'],titleinput='like_all and like_not_hotfan:share_count',notshow = notshow)
Beispiel #2
0
def standRegres(page_name,hotnum,timeline,percent = '',notshow = None):
    from fan import sharefan
    from fan import likesum_nothotfan
    from numpy import *
    #loaddata
    dataMat = []
    labelMat = []
    (hotfan,onehoursdict,msgdict) = sharefan(page_name=page_name,hotfannum=hotnum,hours=timeline,percent=percent,justgetdata=True)
    timedict = likesum_nothotfan(page_name=page_name,hotnum=hotnum,timeline=timeline,percent=percent,justgetdata=True)
    lendatanum = 0
    for i in onehoursdict:
        if i in timedict:
            tempx = 0
            #lendatanum += 1
            if not onehoursdict[i]:
                if msgdict[i][0]>50:
                    print str(msgdict[i][0])+msgdict[i][2]+str(msgdict[i][1])+' '+i
                continue
            lendatanum += 1
            for j in hotfan:
                if (j,) in onehoursdict[i]:
                    tempx += 1
            x = tempx/len(onehoursdict[i])
            if msgdict[i][0]:
                if x <=0 :
                    x = 1
                if timedict[i]['like_sum'] <= 0:
                    timedict[i]['like_sum'] = 1
                y = msgdict[i][0]
                dataMat.append([log10(x),log10(timedict[i]['like_sum']),1.0])
                labelMat.append(log10(y))
    #计算w
    print '总条数:'+str(lendatanum)
    print '总数据:'+str(len(dataMat))
    testx = []
    testy = []
    (dataMat,labelMat,testx,testy) = devide_data(dataMat,labelMat)
    print '训练数据:'+str(len(dataMat))
    print '测试数据:'+str(len(testx))
    xMat = mat(dataMat); yMat = mat(labelMat).T
    xTx = xMat.T*xMat
    if linalg.det(xTx) == 0.0:
        print '没有解'
        return
    ws = xTx.I * (xMat.T*yMat)
    return drawtest(dataMat,labelMat,ws.getA1(),page_name,hotnum,timeline,testx,testy,percent=percent,xlabelinput=['sharehotfan','like_not_fan'],titleinput='like_sum_notfan and sharehotfan:share_count',notshow = notshow)