import svdRec from numpy import * Data = svdRec.loadExData() U, Sigma, VT = linalg.svd(Data) print Sigma Sig3 = mat([[Sigma[0], 0, 0], [0, Sigma[1], 0], [0, 0, Sigma[2]]]) print U[:,:3] * Sig3 * VT[:3,:] myMat = mat(svdRec.loadExData()) print svdRec.ecludSim(myMat[:,0], myMat[:,4]) print svdRec.ecludSim(myMat[:,0], myMat[:,1]) print svdRec.pearsSim(myMat[:,0], myMat[:,4]) print svdRec.pearsSim(myMat[:,0], myMat[:,1]) print svdRec.cosSim(myMat[:,0], myMat[:,4]) print svdRec.cosSim(myMat[:,0], myMat[:,1]) myMat = mat(svdRec.loadExData1()) print myMat print svdRec.recommend(myMat, 2) print svdRec.recommend(myMat, 2, simMeas=svdRec.ecludSim) print svdRec.recommend(myMat, 2, simMeas=svdRec.pearsSim) from numpy import linalg as la U, Sigma, VT = la.svd(mat(svdRec.loadExData2())) print Sigma Sig2 = Sigma**2 print sum(Sig2) print sum(Sig2)*0.9 print sum(Sig2[:2])
def test2(): myMat = mat(svdRec.loadExData()) print myMat print svdRec.ecludSim(myMat[:, 0], myMat[:, 4]) print svdRec.cosSim(mat([[4], [2]]), mat([[1], [2]])) print svdRec.pearsSim(myMat[:, 0], myMat[:, 4])
from numpy import * import svdRec myMat = mat(svdRec.loadExData()) test1 = svdRec.ecludSim(myMat[:, 0], myMat[:, 4]) test2 = svdRec.ecludSim(myMat[:, 0], myMat[:, 0]) print(test1, test2) test1 = svdRec.cosSim(myMat[:, 0], myMat[:, 4]) test2 = svdRec.cosSim(myMat[:, 0], myMat[:, 0]) print(test1, test2) test1 = svdRec.pearsSim(myMat[:, 0], myMat[:, 4]) test2 = svdRec.pearsSim(myMat[:, 0], myMat[:, 0]) print(test1, test2) result = svdRec.recommend(myMat, 2) print(result) result = svdRec.recommend(myMat, 2, simMeans=svdRec.ecludSim) print(result) result = svdRec.recommend(myMat, 2, simMeans=svdRec.pearsSim) print(result)
# SVD implement from numpy import * U, Sigma, VT = linalg.svd([[1, 1], [7, 7]]) #print (U,Sigma,VT) import svdRec Data = svdRec.loadExData() U, Sigma, VT = linalg.svd(Data) print(Sigma) # 返回对角线的元素节省空间 print(Sigma.shape) Sig3 = mat([[Sigma[0], 0, 0], [0, Sigma[1], 0], [0, 0, Sigma[1]]]) rec_image = U[:, :3] * Sig3 * VT[:3, :] print(rec_image) myMat = mat(svdRec.loadExData()) dis = svdRec.ecludSim(myMat[:, 0], myMat[:, 4]) print(dis) dis = svdRec.cosSim(myMat[:, 0], myMat[:, 4]) print dis dis = svdRec.pearsSim(myMat[:, 0], myMat[:, 4]) print dis svdRec.recommend(myMat, 2)
print("VT=",VT) Sig3=mat([[Sigma[0],0,0],[0,Sigma[1],0],[0,0,Sigma[2]]]) Sig5=mat([[Sigma[0],0,0,0,0],[0,Sigma[1],0,0,0],[0,0,Sigma[2],0,0],[0,0,0,Sigma[3],0],[0,0,0,0,Sigma[4]]]) DataWithSig3=U[:,:3]*Sig3*VT[:3,:] #DataWithSig5=U[:,:]*Sig5*VT[:,:] print("Data Sigma3 ReConstruct:\n",U[:,:3]*Sig3*VT[:3,:]) #print("Data Sigma5 ReConstruct:\n",DataWithSig5) myMat=mat(svdRec.loadExData()) print(svdRec.ecludSim(myMat[:,0],myMat[:,4])) print(svdRec.ecludSim(myMat[:,0],myMat[:,0])) print(svdRec.cosSim(myMat[:,0],myMat[:,4])) print(svdRec.cosSim(myMat[:,0],myMat[:,0])) print(svdRec.pearsSim(myMat[:,0],myMat[:,4])) print(svdRec.pearsSim(myMat[:,0],myMat[:,0])) myMat=mat([[4,4,0,2,2],[4,0,0,3,3],[4,0,0,1,1],[1,1,1,2,0], [2,2,2,0,0],[1,1,1,0,0],[5,5,5,0,0]]) print(myMat) recommendResult=svdRec.recommend(myMat,2) recommendResultWithEclud=svdRec.recommend(myMat,2,simMeas=svdRec.ecludSim) recommendResultWithPears=svdRec.recommend(myMat,2,simMeas=svdRec.pearsSim) print(recommendResult) print(recommendResultWithEclud) print(recommendResultWithPears) U,Sigma,VT=linalg.svd(mat(svdRec.loadExData2()))
data = svdRec.loadExData() U, Sigma, VT = np.linalg.svd(data) # 在一个稍微大点的数据集上看看效果 Sigma # 前三个数值明显大于后两个,数量及上差太多了~ # 所以呢?原数据集可以用Data(m*n) = U(m*3) * Σ(3*3) * V^T(3*n) 来近似? Sig3 = np.mat([[Sigma[0], 0, 0], [0, Sigma[1], 0], [0, 0, Sigma[2]]]) U[:, :3] * Sig3 * VT[:3, :] # 之后就用这个了? reload(svdRec) myMat = np.mat(svdRec.loadExData()) # 以下都是查看列向量的相似度 svdRec.eculidSim(myMat[:, 0], myMat[:, 4]) # 欧氏距离 svdRec.eculidSim(myMat[:, 0], myMat[:, 0]) svdRec.cosSim(myMat[:, 0], myMat[:, 4]) # 余弦相似度 svdRec.cosSim(myMat[:, 0], myMat[:, 0]) svdRec.pearsSim(myMat[:, 0], myMat[:, 4]) # 皮尔逊相关系数 svdRec.pearsSim(myMat[:, 0], myMat[:, 0]) ''' *关于是用列向量还是行向量的问题* 如果m>>n,也就是样本数大于特征数,则用列向量进行计算相似度,因为减少计算量 如果m<<n,也就是样本数小于特征数,则用行向量进行计算相似度,也是节省 ''' reload(svdRec) myMat = np.mat(svdRec.loadExData()) myMat[0, 1] = myMat[0, 0] = myMat[1, 0] = myMat[2, 0] = 4 myMat[3, 3] = 2 myMat svdRec.recommend(myMat, 2) # 用户2,未评分的商品的评分情况,元组中第一个是商品ID,第二个是评分 svdRec.recommend(myMat, 2, simMeas=svdRec.eculidSim) # 换一下相关度计算方法 svdRec.recommend(myMat, 2, simMeas=svdRec.pearsSim)
import svdRec from numpy import * Data = svdRec.loadExData() U, Sigma, VT = linalg.svd(Data) print Sigma Sig3 = mat([[Sigma[0], 0, 0], [0, Sigma[1], 0], [0, 0, Sigma[2]]]) print U[:, :3] * Sig3 * VT[:3, :] myMat = mat(svdRec.loadExData()) print svdRec.ecludSim(myMat[:, 0], myMat[:, 4]) print svdRec.ecludSim(myMat[:, 0], myMat[:, 1]) print svdRec.pearsSim(myMat[:, 0], myMat[:, 4]) print svdRec.pearsSim(myMat[:, 0], myMat[:, 1]) print svdRec.cosSim(myMat[:, 0], myMat[:, 4]) print svdRec.cosSim(myMat[:, 0], myMat[:, 1]) myMat = mat(svdRec.loadExData1()) print myMat print svdRec.recommend(myMat, 2) print svdRec.recommend(myMat, 2, simMeas=svdRec.ecludSim) print svdRec.recommend(myMat, 2, simMeas=svdRec.pearsSim) from numpy import linalg as la U, Sigma, VT = la.svd(mat(svdRec.loadExData2())) print Sigma Sig2 = Sigma**2 print sum(Sig2) print sum(Sig2) * 0.9 print sum(Sig2[:2])