vectors = read_record(sc, record_file)
    itemIndex = read_index(sc, itemIndex_file, True)
    userIndex = read_index(sc, userIndex_file, False)
    user_id = read_userID_file(sc, userID_file)


    f = open('output.txt','w')

    for u in user_id:
        uid = userIndex[u]
    
        user = getUserVector(uid, vectors)
    
        otherUser = vectors.filter(lambda (k,v): k != uid)
        neighbors = knn2(sc, user, otherUser, K) 
        #neighbors = knn1(sc, user, otherUser, K) 

        rlt = getRecommend(neighbors) 
        
        tmp = ""
        for r in rlt:
            if r[0] in itemIndex:
                tmp = tmp + itemIndex[r[0]] + ' '

        f.write("%s: %s\n" % (u, tmp))

    f.close()


Exemple #2
0
# coding: UTF-8


import matplotlib
import matplotlib.pyplot as plt
import numpy as np

import knn
import mnist_test

group, labels = knn.createDataSet()
bb = knn.classify0([0,0], group, labels, 3)
print bb
cc = knn.knn2([0,0], group, labels, 3)

# 可视化数据
dataSet, labels = knn.file2matrix('data/datingTestSet2.txt')
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(dataSet[:,0], dataSet[:,1], s=15.0*labels, c=15.0*labels)
plt.show()


# 测试误判率
reload(knn)
# testRatio为测试集比例,k为邻居个数
knn.knnTest('../data/datingTestSet2.txt',testRatio=0.2, k=3)


# 测试手写数字识别