예제 #1
0
for datum in training_data:
    if (random.randrange(2) == 0):
        train_data.append(datum)
    else:
        test_data.append(datum)

# Compute the global mean rating for a fallback.
num_train = len(train_data)
mean_rating = float(sum(map(lambda x: x['rating'], train_data)))/num_train

num_users = len(user_list)
mat = np.zeros((num_users,1))
for i in range(num_users):
    mat[i][0] = user_list[i]['age']

[_,resp] = kmeans_plus.kmeans_plus(mat, NUM_CLUSTERS);

users = {}
clusters = []
for i in range(NUM_CLUSTERS):
    clusters.append(set())
for i in range(num_users):
    clusters[resp[i]].add(user_list[i]['user'])
    users[user_list[i]['user']] = {'index':resp[i],'total':0.,'count':0}

'''users = {}
clusters = []
for i in range(5):
    clusters.append(set())
for i in range(num_users):
    cluster = 0
예제 #2
0
    else:
        book_pref = mean_rating
    mat[user_keys[rating['user']]][book_keys[rating['isbn']]] = rating['rating'] - float(user['total']) / user['count'] + mean_rating - book_pref
    mat[user_keys[rating['user']]][book_keys[rating['isbn']]] = rating['rating'] - float(user['total']) / user['count']'''

    mat[user_keys[rating['user']]][book_keys[rating['isbn']]] = rating['rating'] - float(user['total']) / user['count']

pca = PCA(n_components = NUM_COMPONENTS)
reduced_mat = pca.fit_transform(mat)

'''vars = pca.explained_variance_ratio_
x = [w for w in range(NUM_COMPONENTS)]
plt.plot(x,vars,'ro')
plt.savefig('pca_variances.png')'''

[mu,resp] = kmeans_plus.kmeans_plus(reduced_mat, NUM_CLUSTERS)

cluster_ids = []
for i in range(NUM_CLUSTERS):
    cluster_ids.append(set())
for i in range(num_users):
    cluster_ids[resp[i]].add(inv_user_keys[i])
# Make predictions for each test query.

long_book_keys = {}
index = 0
for book in book_list:
    long_book_keys[book['isbn']] = index
    index += 1

training_sorted = []