Exemple #1
0
    def test_pa3(self):
        testdata = zip([(1024, 77), (1024, 268), (1024, 462), (1024, 393),
                        (1024, 36955), (2048, 77), (2048, 36955), (2048, 788)],
                       [
                           "1024,77,4.3848,Memento (2000)",
                           "1024,268,2.8646,Batman (1989)",
                           "1024,462,3.1082,Erin Brockovich (2000)",
                           "1024,393,3.8722,Kill Bill: Vol. 2 (2004)",
                           "1024,36955,2.3524,True Lies (1994)",
                           "2048,77,4.8493,Memento (2000)",
                           "2048,36955,3.9698,True Lies (1994)",
                           "2048,788,3.8509,Mrs. Doubtfire (1993)",
                       ])

        data = DataIO(verbose=False)
        data.load('testdata/ratings.csv',
                  items_file='testdata/movie-titles.csv')
        model = UserModel(verbose=False, normalize=True)
        model.build(data)

        for ((u, i), s) in testdata:
            self.assertTrue(
                '%s' % s == '%d,%d,%.4f,%s' %
                (u, i,
                 user_based_knn(model,
                                30, [data.new_user_idx(u)],
                                [data.new_item_idx(i)],
                                cosine,
                                promote_users=True,
                                normalize='centered'), data.title(i)))
Exemple #2
0
		(5399,14),
		(5399,187),
		(5399,602),
		(5399,629),
		(3613,329),
		(3613,604),
		(3613,134),
		(3613,1637),
		(3613,278),
		(1873,786),
		(1873,2502),
		(1873,550),
		(1873,1894),
		(1873,1422),
		(4914,268),
		(4914,36658),
		(4914,786),
		(4914,161),
		(4914,854)]

file = open(answer_file,'w')
file.write('\n'.join(
            ['%d,%d,%.4f,%s' % (
                u, 
                i, 
                user_based_knn(model, NN, [data.new_user_idx(u)], 
                                          [data.new_item_idx(i)], cosine, promote_users = True, normalize = 'centered')[0], 
                data.title(i))
                    for (u,i) in inputs]))
file.close()
Exemple #3
0
ratings_file = '../data/ratings.csv'
items_file = '../data/movie-titles.csv'
NN = 30
answer_file = 'part_1.csv'

# part 1

data = DataIO()
data.load(ratings_file, items_file=items_file)
model = UserModel(normalize=True)
model.build(data)

inputs = [(4169, 161), (4169, 36955), (4169, 453), (4169, 857), (4169, 238),
          (5399, 1891), (5399, 14), (5399, 187), (5399, 602), (5399, 629),
          (3613, 329), (3613, 604), (3613, 134), (3613, 1637), (3613, 278),
          (1873, 786), (1873, 2502), (1873, 550), (1873, 1894), (1873, 1422),
          (4914, 268), (4914, 36658), (4914, 786), (4914, 161), (4914, 854)]

file = open(answer_file, 'w')
file.write('\n'.join([
    '%d,%d,%.4f,%s' %
    (u, i,
     user_based_knn(model,
                    NN, [data.new_user_idx(u)], [data.new_item_idx(i)],
                    cosine,
                    promote_users=True,
                    normalize='centered')[0], data.title(i))
    for (u, i) in inputs
]))
file.close()
Exemple #4
0
    def test_pa3(self):
        testdata = zip([(1024,77),(1024,268),(1024,462),(1024,393),(1024,36955),(2048,77),(2048,36955),(2048,788)],
                       [
                        "1024,77,4.3848,Memento (2000)",
                        "1024,268,2.8646,Batman (1989)",
                        "1024,462,3.1082,Erin Brockovich (2000)",
                        "1024,393,3.8722,Kill Bill: Vol. 2 (2004)",
                        "1024,36955,2.3524,True Lies (1994)",
                        "2048,77,4.8493,Memento (2000)",
                        "2048,36955,3.9698,True Lies (1994)",
                        "2048,788,3.8509,Mrs. Doubtfire (1993)",
                        ])

        data = DataIO(verbose = False)
        data.load('testdata/ratings.csv', items_file = 'testdata/movie-titles.csv')
        model = UserModel(verbose = False, normalize = True)
        model.build(data)
        
        for ((u,i),s) in testdata:
            self.assertTrue('%s' % s ==
                            '%d,%d,%.4f,%s' % (u,i,user_based_knn(model, 30, [data.new_user_idx(u)],[data.new_item_idx(i)], 
                                                cosine, promote_users = True, normalize = 'centered'), data.title(i)))