Ejemplo n.º 1
0
    def test_pa3(self):
        testdata = zip([(1024, 77), (1024, 268), (1024, 462), (1024, 393),
                        (1024, 36955), (2048, 77), (2048, 36955), (2048, 788)],
                       [
                           "1024,77,4.3848,Memento (2000)",
                           "1024,268,2.8646,Batman (1989)",
                           "1024,462,3.1082,Erin Brockovich (2000)",
                           "1024,393,3.8722,Kill Bill: Vol. 2 (2004)",
                           "1024,36955,2.3524,True Lies (1994)",
                           "2048,77,4.8493,Memento (2000)",
                           "2048,36955,3.9698,True Lies (1994)",
                           "2048,788,3.8509,Mrs. Doubtfire (1993)",
                       ])

        data = DataIO(verbose=False)
        data.load('testdata/ratings.csv',
                  items_file='testdata/movie-titles.csv')
        model = UserModel(verbose=False, normalize=True)
        model.build(data)

        for ((u, i), s) in testdata:
            self.assertTrue(
                '%s' % s == '%d,%d,%.4f,%s' %
                (u, i,
                 user_based_knn(model,
                                30, [data.new_user_idx(u)],
                                [data.new_item_idx(i)],
                                cosine,
                                promote_users=True,
                                normalize='centered'), data.title(i)))
Ejemplo n.º 2
0
 def test_unnormalized(self):
    u = 3712
    expected = [(641,5.000), (603,4.856), (105,4.739)]
    R = user_based_knn(self.model, 5, [self.data.new_user_idx(u)], range(self.data.num_items()), 
             pearson, promote_users = False)
    recs = top_ns([R],3, keep_order = True)
    self.assertTrue(','.join(['%d,%.3f' % (self.data.old_item_idx(a),b) for (a,b) in recs[0]]) == 
                    ','.join(['%d,%.3f' % a for a in expected]))
Ejemplo n.º 3
0
 def test_unnormalized(self):
     u = 3712
     expected = [(641, 5.000), (603, 4.856), (105, 4.739)]
     R = user_based_knn(self.model,
                        5, [self.data.new_user_idx(u)],
                        range(self.data.num_items()),
                        pearson,
                        promote_users=False)
     recs = top_ns([R], 3, keep_order=True)
     self.assertTrue(','.join(
         ['%d,%.3f' % (self.data.old_item_idx(a), b) for (
             a,
             b) in recs[0]]) == ','.join(['%d,%.3f' % a for a in expected]))
Ejemplo n.º 4
0
    def test_pa3(self):
        testdata = zip([(1024,77),(1024,268),(1024,462),(1024,393),(1024,36955),(2048,77),(2048,36955),(2048,788)],
                       [
                        "1024,77,4.3848,Memento (2000)",
                        "1024,268,2.8646,Batman (1989)",
                        "1024,462,3.1082,Erin Brockovich (2000)",
                        "1024,393,3.8722,Kill Bill: Vol. 2 (2004)",
                        "1024,36955,2.3524,True Lies (1994)",
                        "2048,77,4.8493,Memento (2000)",
                        "2048,36955,3.9698,True Lies (1994)",
                        "2048,788,3.8509,Mrs. Doubtfire (1993)",
                        ])

        data = DataIO(verbose = False)
        data.load('testdata/ratings.csv', items_file = 'testdata/movie-titles.csv')
        model = UserModel(verbose = False, normalize = True)
        model.build(data)
        
        for ((u,i),s) in testdata:
            self.assertTrue('%s' % s ==
                            '%d,%d,%.4f,%s' % (u,i,user_based_knn(model, 30, [data.new_user_idx(u)],[data.new_item_idx(i)], 
                                                cosine, promote_users = True, normalize = 'centered'), data.title(i)))
Ejemplo n.º 5
0
NN = 5
n = 3
part_1_file = "part_1.csv"
part_2_file = "part_2.csv"

# part 1

data = DataIO()
data.load(ratings_file)
model = UserModel(normalize=False)
model.build(data)

given_users = data.translate_users(given_users)
given_items = range(data.num_items())

R = user_based_knn(model, NN, given_users, given_items, pearson, promote_users=False)
recs = top_ns(R, n, keep_order=True)

file = open(part_1_file, "w")
file.write("\n".join(["%d %.3f" % (data.old_item_idx(i), s) for u in recs for (i, s) in u]))
file.close()

# part 2

R = user_based_knn(
    model, NN, given_users, given_items, pearson, promote_users=False, exclude_seen=False, normalize=True
)
recs = top_ns(R, n, keep_order=True)

file = open(part_2_file, "w")
file.write("\n".join(["%d %.3f" % (data.old_item_idx(i), s) for u in recs for (i, s) in u]))
Ejemplo n.º 6
0
		(5399,14),
		(5399,187),
		(5399,602),
		(5399,629),
		(3613,329),
		(3613,604),
		(3613,134),
		(3613,1637),
		(3613,278),
		(1873,786),
		(1873,2502),
		(1873,550),
		(1873,1894),
		(1873,1422),
		(4914,268),
		(4914,36658),
		(4914,786),
		(4914,161),
		(4914,854)]

file = open(answer_file,'w')
file.write('\n'.join(
            ['%d,%d,%.4f,%s' % (
                u, 
                i, 
                user_based_knn(model, NN, [data.new_user_idx(u)], 
                                          [data.new_item_idx(i)], cosine, promote_users = True, normalize = 'centered')[0], 
                data.title(i))
                    for (u,i) in inputs]))
file.close()
Ejemplo n.º 7
0
ratings_file = '../data/ratings.csv'
items_file = '../data/movie-titles.csv'
NN = 30
answer_file = 'part_1.csv'

# part 1

data = DataIO()
data.load(ratings_file, items_file=items_file)
model = UserModel(normalize=True)
model.build(data)

inputs = [(4169, 161), (4169, 36955), (4169, 453), (4169, 857), (4169, 238),
          (5399, 1891), (5399, 14), (5399, 187), (5399, 602), (5399, 629),
          (3613, 329), (3613, 604), (3613, 134), (3613, 1637), (3613, 278),
          (1873, 786), (1873, 2502), (1873, 550), (1873, 1894), (1873, 1422),
          (4914, 268), (4914, 36658), (4914, 786), (4914, 161), (4914, 854)]

file = open(answer_file, 'w')
file.write('\n'.join([
    '%d,%d,%.4f,%s' %
    (u, i,
     user_based_knn(model,
                    NN, [data.new_user_idx(u)], [data.new_item_idx(i)],
                    cosine,
                    promote_users=True,
                    normalize='centered')[0], data.title(i))
    for (u, i) in inputs
]))
file.close()
Ejemplo n.º 8
0
part_1_file = 'part_1.csv'
part_2_file = 'part_2.csv'

# part 1

data = DataIO()
data.load(ratings_file)
model = UserModel(normalize=False)
model.build(data)

given_users = data.translate_users(given_users)
given_items = range(data.num_items())

R = user_based_knn(model,
                   NN,
                   given_users,
                   given_items,
                   pearson,
                   promote_users=False)
recs = top_ns(R, n, keep_order=True)

file = open(part_1_file, 'w')
file.write('\n'.join(
    ['%d %.3f' % (data.old_item_idx(i), s) for u in recs for (i, s) in u]))
file.close()

# part 2

R = user_based_knn(model,
                   NN,
                   given_users,
                   given_items,
Ejemplo n.º 9
0
 def test_user_knn(self):
     expected = np.matrix([[ 2.47335263, 2.72],
                           [ 3.20666667, 5.34]])
     self.assertTrue(stringify_matrix(user_based_knn(self.model, 30, [0,2], [2,3], cosine))
                     ==
                     stringify_matrix(expected))
Ejemplo n.º 10
0
 def test_user_knn(self):
     expected = np.matrix([[2.47335263, 2.72], [3.20666667, 5.34]])
     self.assertTrue(
         stringify_matrix(
             user_based_knn(self.model, 30, [0, 2], [2, 3], cosine)) ==
         stringify_matrix(expected))