def standRegres(page_name,hotnum,timeline,percent = '',notshow = None): from fan import sharefan from fan import likesum_nothotfan from numpy import * #loaddata dataMat = [] labelMat = [] (hotfan,onehoursdict,msgdict) = sharefan(page_name=page_name,hotfannum=hotnum,hours=timeline,percent=percent,justgetdata=True) timedict = likesum_nothotfan(page_name=page_name,hotnum=hotnum,timeline=timeline,percent=percent,justgetdata=True) lendatanum = 0 for i in onehoursdict: if i in timedict: tempx = 0 #lendatanum += 1 if not onehoursdict[i]: if msgdict[i][0]>50: print str(msgdict[i][0])+msgdict[i][2]+str(msgdict[i][1])+' '+i continue lendatanum += 1 for j in hotfan: if (j,) in onehoursdict[i]: tempx += 1 x = tempx/len(onehoursdict[i]) if msgdict[i][0]: if x <=0 : x = 1 if timedict[i]['like_sum'] <= 0: timedict[i]['like_sum'] = 1 y = msgdict[i][0] dataMat.append([log10(x),log10(timedict[i]['like_sum']),1.0]) labelMat.append(log10(y)) #计算w print '总条数:'+str(lendatanum) print '总数据:'+str(len(dataMat)) testx = [] testy = [] (dataMat,labelMat,testx,testy) = devide_data(dataMat,labelMat) print '训练数据:'+str(len(dataMat)) print '测试数据:'+str(len(testx)) xMat = mat(dataMat); yMat = mat(labelMat).T xTx = xMat.T*xMat if linalg.det(xTx) == 0.0: print '没有解' return ws = xTx.I * (xMat.T*yMat) return drawtest(dataMat,labelMat,ws.getA1(),page_name,hotnum,timeline,testx,testy,percent=percent,xlabelinput=['sharehotfan','like_not_fan'],titleinput='like_sum_notfan and sharehotfan:share_count',notshow = notshow)
def standRegres_share(page_name,hotnum,timeline,percent = '',notshow = None): from fan import sharefan from fan import likesum_nothotfan from numpy import * #loaddata dataMat = [] labelMat = [] (hotfan,onehoursdict,msgdict) = sharefan(page_name=page_name,hotfannum=hotnum,hours=timeline,percent=percent,justgetdata=True) id2num = dict() lendatanum = 0 for i in onehoursdict: if onehoursdict[i] and msgdict[i]: id2num[i] = lendatanum lendatanum += 1 tempx = 0 for j in onehoursdict[i]: if j[0] in hotfan: tempx += 1 x = tempx/len(onehoursdict[i]) if msgdict[i][0]: y = msgdict[i][0] if x > 0: dataMat.append([log10(x),log10(len(onehoursdict[i])),1.0]) else: x = 1 dataMat.append([log10(x),log10(len(onehoursdict[i])),1.0]) labelMat.append(log10(y)) xMat = mat(dataMat); yMat = mat(labelMat).T xTx = xMat.T*xMat if linalg.det(xTx) == 0.0: print '没有解' return ws = xTx.I * (xMat.T*yMat) ws = ws.getA1() print ws while(1): test_id = raw_input('please input id:') if test_id == 'q': break print xMat[id2num[test_id]] tempx = dataMat[id2num[test_id]] print tempx testresult = 10**(ws[0]*tempx[0] + ws[1]*tempx[1] + ws[2]) print '预测结果:' + str(testresult) print '真实结果:' + str(msgdict[test_id][0])
def standRegres_share(page_name,hotnum,timeline,percent = '',notshow = None): from fan import sharefan from fan import likesum_nothotfan from numpy import * #loaddata dataMat = [] labelMat = [] (hotfan,onehoursdict,msgdict) = sharefan(page_name=page_name,hotfannum=hotnum,hours=timeline,percent=percent,justgetdata=True) lendatanum = 0 for i in onehoursdict: if onehoursdict[i] and msgdict[i]: lendatanum += 1 tempx = 0 for j in onehoursdict[i]: if j[0] in hotfan: tempx += 1 x = tempx/len(onehoursdict[i]) if msgdict[i][0]: y = msgdict[i][0] if x > 0: dataMat.append([log10(x),log10(len(onehoursdict[i])),1.0]) else: x = 1 dataMat.append([log10(x),log10(len(onehoursdict[i])),1.0]) labelMat.append(log10(y)) print '总条数:'+str(lendatanum) print '总数据:'+str(len(dataMat)) testx = [] testy = [] (dataMat,labelMat,testx,testy) = devide_data(dataMat,labelMat) print '训练数据:'+str(len(dataMat)) print '测试数据:'+str(len(testx)) xMat = mat(dataMat); yMat = mat(labelMat).T xTx = xMat.T*xMat if linalg.det(xTx) == 0.0: print '没有解' return ws = xTx.I * (xMat.T*yMat) return drawtest(dataMat,labelMat,ws.getA1(),page_name,hotnum,timeline,testx,testy,percent=percent,xlabelinput=['share_hourall','share_hotfan_per'],titleinput='shareall and sharehotfan:share_count',notshow = notshow)