def standRegres_like(page_name,hotnum,timeline,percent = '',notshow = None): from fan import sharefan from fan import likesum_nothotfan from numpy import * #loaddata dataMat = [] labelMat = [] timedict = likesum_nothotfan(page_name=page_name,hotnum=hotnum,timeline=timeline,percent=percent,justgetdata=True) lendatanum = 0 for i in timedict: if timedict[i]['all_like_sum']>0: lendatanum += 1 if timedict[i]['like_sum'] <= 0: dataMat.append([log10(timedict[i]['all_like_sum']),log10(1),1.0]) else: dataMat.append([log10(timedict[i]['all_like_sum']),log10(timedict[i]['like_sum']),1.0]) labelMat.append(log10(timedict[i]['share_count'])) print '总条数:'+str(lendatanum) print '总数据:'+str(len(dataMat)) testx = [] testy = [] (dataMat,labelMat,testx,testy) = devide_data(dataMat,labelMat) print '训练数据:'+str(len(dataMat)) print '测试数据:'+str(len(testx)) xMat = mat(dataMat); yMat = mat(labelMat).T xTx = xMat.T*xMat if linalg.det(xTx) == 0.0: print '没有解' print xTx return ws = xTx.I * (xMat.T*yMat) return drawtest(dataMat,labelMat,ws.getA1(),page_name,hotnum,timeline,testx,testy,percent=percent,xlabelinput=['like_hourall','like_nothotfan'],titleinput='like_all and like_not_hotfan:share_count',notshow = notshow)
def standRegres(page_name,hotnum,timeline,percent = '',notshow = None): from fan import sharefan from fan import likesum_nothotfan from numpy import * #loaddata dataMat = [] labelMat = [] (hotfan,onehoursdict,msgdict) = sharefan(page_name=page_name,hotfannum=hotnum,hours=timeline,percent=percent,justgetdata=True) timedict = likesum_nothotfan(page_name=page_name,hotnum=hotnum,timeline=timeline,percent=percent,justgetdata=True) lendatanum = 0 for i in onehoursdict: if i in timedict: tempx = 0 #lendatanum += 1 if not onehoursdict[i]: if msgdict[i][0]>50: print str(msgdict[i][0])+msgdict[i][2]+str(msgdict[i][1])+' '+i continue lendatanum += 1 for j in hotfan: if (j,) in onehoursdict[i]: tempx += 1 x = tempx/len(onehoursdict[i]) if msgdict[i][0]: if x <=0 : x = 1 if timedict[i]['like_sum'] <= 0: timedict[i]['like_sum'] = 1 y = msgdict[i][0] dataMat.append([log10(x),log10(timedict[i]['like_sum']),1.0]) labelMat.append(log10(y)) #计算w print '总条数:'+str(lendatanum) print '总数据:'+str(len(dataMat)) testx = [] testy = [] (dataMat,labelMat,testx,testy) = devide_data(dataMat,labelMat) print '训练数据:'+str(len(dataMat)) print '测试数据:'+str(len(testx)) xMat = mat(dataMat); yMat = mat(labelMat).T xTx = xMat.T*xMat if linalg.det(xTx) == 0.0: print '没有解' return ws = xTx.I * (xMat.T*yMat) return drawtest(dataMat,labelMat,ws.getA1(),page_name,hotnum,timeline,testx,testy,percent=percent,xlabelinput=['sharehotfan','like_not_fan'],titleinput='like_sum_notfan and sharehotfan:share_count',notshow = notshow)