def __init__(self, matrix, similar = 0): """ init method Args: matrix: the user-item matrix contain the score by user. matrix is an instance of recsys.utils.sparse_matrix.DictMatrix. if you make sure matrix's index i and j is continuous from 0, the compute speed while be fast. """ self._matrix = matrix self._avg = {} if similar == UserCF.USER_SIMILAR: from recsys.similarity.user_similar import UserSimilar self._user_similar = UserSimilar(matrix) elif similar == UserCF.HERLOCKER_USER_SIMILAR: from recsys.similarity.herlocker_user_similar import HerlockerUserSimilar self._user_similar = HerlockerUserSimilar(matrix)
def setUp(self): db = bsddb.btopen(None, cachesize = 268435456) matrix = DictMatrix(10, 10, db) m1, m2 = ([1, 1, 2, 3], [1, 1, 2, 3]) for i in range(1,3): for j in range(1,5): if i == 1: matrix[i, j] = m1[j-1] else: matrix[i, j] = m2[j-1] self.similar = HerlockerUserSimilar(matrix)
class HerlockerUserSimilarTestCase(unittest.TestCase): def setUp(self): db = bsddb.btopen(None, cachesize = 268435456) matrix = DictMatrix(10, 10, db) m1, m2 = ([1, 1, 2, 3], [1, 1, 2, 3]) for i in range(1,3): for j in range(1,5): if i == 1: matrix[i, j] = m1[j-1] else: matrix[i, j] = m2[j-1] self.similar = HerlockerUserSimilar(matrix) def tearDown(self): self.similar = None def testCompute(self): self.assertEqual(True, abs(self.similar.compute(1,2) - 0.08) < 0.01)
class UserCF(): """ Use user-cf algorithm to recommend Attributes: _matrix: the user-item matrix contain the score by user, an instance of recsys.utils.sparse_matrix.DictMatrix. _user_similar: instance of UserSimilar """ #static final value for use different similar algorithm USER_SIMILAR = 0 #UserSimilar HERLOCKER_USER_SIMILAR = 1 #HerlockerUserSimilar def __init__(self, matrix, similar = 0): """ init method Args: matrix: the user-item matrix contain the score by user. matrix is an instance of recsys.utils.sparse_matrix.DictMatrix. if you make sure matrix's index i and j is continuous from 0, the compute speed while be fast. """ self._matrix = matrix self._avg = {} if similar == UserCF.USER_SIMILAR: from recsys.similarity.user_similar import UserSimilar self._user_similar = UserSimilar(matrix) elif similar == UserCF.HERLOCKER_USER_SIMILAR: from recsys.similarity.herlocker_user_similar import HerlockerUserSimilar self._user_similar = HerlockerUserSimilar(matrix) def prediction(self, user, item): """ get prediction of user-item pair by user-cf prediciton(a, p) = avg_a + sum( sim(a,b) * (r(b,p) - avg_b) / sum(sim(a,b)) Args: user: user index item: item index """ # if user have rate the item if self._matrix[user, item] != 0: return self._matrix[user, item] # else topN = self._user_similar.topN(user, item, 50) avg_u = self._userAverage(user) count = 0.0 weights = 0.0 for i in topN: count += (self._matrix[i[0], item] - self._userAverage(i[0])) * i[1] weights += i[1] if weights == 0: return avg_u return avg_u + count / weights def _userAverage(self, user): """ compute the average rate of user Args: user: user index """ try: return self._avg[user] except: avg = self._average(self._matrix[user, ...]) self._avg[user] = avg return avg def _average(self, vec): """ compute the average of vec Args: vec: dictionary with key-index, value-rate """ vec_len = max(vec.keys())+1 total = 0.0 count = 0 for i in range(vec_len): try: if vec[i] <> 0: total += vec[i] count += 1 except: pass if count == 0: return 0 return total/count