def test_pa3(self): testdata = zip([(1024, 77), (1024, 268), (1024, 462), (1024, 393), (1024, 36955), (2048, 77), (2048, 36955), (2048, 788)], [ "1024,77,4.3848,Memento (2000)", "1024,268,2.8646,Batman (1989)", "1024,462,3.1082,Erin Brockovich (2000)", "1024,393,3.8722,Kill Bill: Vol. 2 (2004)", "1024,36955,2.3524,True Lies (1994)", "2048,77,4.8493,Memento (2000)", "2048,36955,3.9698,True Lies (1994)", "2048,788,3.8509,Mrs. Doubtfire (1993)", ]) data = DataIO(verbose=False) data.load('testdata/ratings.csv', items_file='testdata/movie-titles.csv') model = UserModel(verbose=False, normalize=True) model.build(data) for ((u, i), s) in testdata: self.assertTrue( '%s' % s == '%d,%d,%.4f,%s' % (u, i, user_based_knn(model, 30, [data.new_user_idx(u)], [data.new_item_idx(i)], cosine, promote_users=True, normalize='centered'), data.title(i)))
(5399,14), (5399,187), (5399,602), (5399,629), (3613,329), (3613,604), (3613,134), (3613,1637), (3613,278), (1873,786), (1873,2502), (1873,550), (1873,1894), (1873,1422), (4914,268), (4914,36658), (4914,786), (4914,161), (4914,854)] file = open(answer_file,'w') file.write('\n'.join( ['%d,%d,%.4f,%s' % ( u, i, user_based_knn(model, NN, [data.new_user_idx(u)], [data.new_item_idx(i)], cosine, promote_users = True, normalize = 'centered')[0], data.title(i)) for (u,i) in inputs])) file.close()
ratings_file = '../data/ratings.csv' items_file = '../data/movie-titles.csv' NN = 30 answer_file = 'part_1.csv' # part 1 data = DataIO() data.load(ratings_file, items_file=items_file) model = UserModel(normalize=True) model.build(data) inputs = [(4169, 161), (4169, 36955), (4169, 453), (4169, 857), (4169, 238), (5399, 1891), (5399, 14), (5399, 187), (5399, 602), (5399, 629), (3613, 329), (3613, 604), (3613, 134), (3613, 1637), (3613, 278), (1873, 786), (1873, 2502), (1873, 550), (1873, 1894), (1873, 1422), (4914, 268), (4914, 36658), (4914, 786), (4914, 161), (4914, 854)] file = open(answer_file, 'w') file.write('\n'.join([ '%d,%d,%.4f,%s' % (u, i, user_based_knn(model, NN, [data.new_user_idx(u)], [data.new_item_idx(i)], cosine, promote_users=True, normalize='centered')[0], data.title(i)) for (u, i) in inputs ])) file.close()
def test_pa3(self): testdata = zip([(1024,77),(1024,268),(1024,462),(1024,393),(1024,36955),(2048,77),(2048,36955),(2048,788)], [ "1024,77,4.3848,Memento (2000)", "1024,268,2.8646,Batman (1989)", "1024,462,3.1082,Erin Brockovich (2000)", "1024,393,3.8722,Kill Bill: Vol. 2 (2004)", "1024,36955,2.3524,True Lies (1994)", "2048,77,4.8493,Memento (2000)", "2048,36955,3.9698,True Lies (1994)", "2048,788,3.8509,Mrs. Doubtfire (1993)", ]) data = DataIO(verbose = False) data.load('testdata/ratings.csv', items_file = 'testdata/movie-titles.csv') model = UserModel(verbose = False, normalize = True) model.build(data) for ((u,i),s) in testdata: self.assertTrue('%s' % s == '%d,%d,%.4f,%s' % (u,i,user_based_knn(model, 30, [data.new_user_idx(u)],[data.new_item_idx(i)], cosine, promote_users = True, normalize = 'centered'), data.title(i)))