class FeatureWeightedLinearStacking(base_recommender): def __init__(self): self.cb = ContentBasedRecs() self.cf = NeighborhoodBasedRecs() self.wcb1 = Decimal(0.65221204) self.wcb2 = Decimal(-0.14638855) self.wcf1 = Decimal(-0.0062952) self.wcf2 = Decimal(0.09139193) def fun1(self): return Decimal(1.0) def fun2(self, user_id): count = Rating.objects.filter(user_id=user_id).count() if count > 3.0: return Decimal(1.0) return Decimal(0.0) def recommend_items(self, user_id, num=6): cb_recs = self.cb.recommend_items(user_id, num * 5) cf_recs = self.cf.recommend_items(user_id, num * 5) combined_recs = dict() for rec in cb_recs: movie_id = rec[0] pred = rec[1]['prediction'] combined_recs[movie_id] = {'cb': pred} for rec in cf_recs: movie_id = rec[0] pred = rec[1]['prediction'] if movie_id in combined_recs.keys(): combined_recs[movie_id]['cf'] = pred else: combined_recs[movie_id] = {'cf': pred} fwls_preds = dict() for key, recs in combined_recs.items(): if 'cb' not in recs.keys(): recs['cb'] = self.cb.predict_score(user_id, key) if 'cf' not in recs.keys(): recs['cf'] = self.cf.predict_score(user_id, key) pred = self.prediction(recs['cb'], recs['cf'], user_id) fwls_preds[key] = {'prediction': pred} sorted_items = sorted(fwls_preds.items(), key=lambda item: -float(item[1]['prediction']))[:num] return sorted_items def predict_score(self, user_id, item_id): p_cb = self.cb.predict_score(user_id, item_id) p_cf = self.cf.predict_score(user_id, item_id) self.prediction(p_cb, p_cf, user_id) def prediction(self, p_cb, p_cf, user_id): p = (self.wcb1 * self.fun1() * p_cb + self.wcb2 * self.fun2(user_id) * p_cb + self.wcf1 * self.fun1() * p_cf + self.wcf2 * self.fun2(user_id) * p_cf) return p
class CFCoverage(object): def __init__(self): self.all_users = Rating.objects.all().values('user_id').distinct() self.cf = NeighborhoodBasedRecs() self.items_in_rec = defaultdict(int) self.users_with_recs = [] def calculate_coverage(self): print('calculating coverage for all users ({} in total)'.format( len(self.all_users))) for user in self.all_users: user_id = str(user['user_id']) recset = self.cf.recommend_items(user_id) if recset: self.users_with_recs.append(user) for rec in recset: self.items_in_rec[rec[0]] += 1 print('found recs for {}'.format(user_id)) print('writing cf coverage to file.') json.dump(self.items_in_rec, open('cf_coverage.json', 'w')) no_movies = Movie.objects.all().count() no_movies_in_rec = len(self.items_in_rec.items()) print("{} {} {}".format(no_movies, no_movies_in_rec, float(no_movies / no_movies_in_rec))) return no_movies_in_rec / no_movies
class FeatureWeightedLinearStacking(base_recommender): def __init__(self): self.cb = ContentBasedRecs() self.cf = NeighborhoodBasedRecs() self.wcb1 = Decimal(0.65221204) self.wcb2 = Decimal(-0.14638855) self.wcf1 = Decimal(-0.0062952) self.wcf2 = Decimal(0.09139193) self.intercept = Decimal(0) def fun1(self): return Decimal(1.0) def fun2(self, user_id): count = Rating.objects.filter(user_id=user_id).count() if count > 3.0: return Decimal(1.0) return Decimal(0.0) def set_save_path(self, save_path): with open(save_path + 'fwls_parameters.data', 'rb') as ub_file: parameters = pickle.load(ub_file) self.wcb1 = Decimal(parameters['cb1']) self.wcb2 = Decimal(parameters['cb2']) self.wcf1 = Decimal(parameters['cb1']) self.wcf2 = Decimal(parameters['cf2']) self.intercept = Decimal(parameters['intercept']) def recommend_items_by_ratings(self, user_id, active_user_items, num=6): cb_recs = self.cb.recommend_items_by_ratings(user_id, active_user_items, num * 5) cf_recs = self.cf.recommend_items_by_ratings(user_id, active_user_items, num * 5) return self.merge_predictions(user_id, cb_recs, cf_recs, num) def recommend_items(self, user_id, num=6): cb_recs = self.cb.recommend_items(user_id, num * 5) cf_recs = self.cf.recommend_items(user_id, num * 5) return self.merge_predictions(user_id, cb_recs, cf_recs, num) def merge_predictions(self, user_id, cb_recs, cf_recs, num): combined_recs = dict() for rec in cb_recs: movie_id = rec[0] pred = rec[1]['prediction'] combined_recs[movie_id] = {'cb': pred} for rec in cf_recs: movie_id = rec[0] pred = rec[1]['prediction'] if movie_id in combined_recs.keys(): combined_recs[movie_id]['cf'] = pred else: combined_recs[movie_id] = {'cf': pred} fwls_preds = dict() for key, recs in combined_recs.items(): if 'cb' not in recs.keys(): recs['cb'] = self.cb.predict_score(user_id, key) if 'cf' not in recs.keys(): recs['cf'] = self.cf.predict_score(user_id, key) pred = self.prediction(recs['cb'], recs['cf'], user_id) fwls_preds[key] = {'prediction': pred} sorted_items = sorted( fwls_preds.items(), key=lambda item: -float(item[1]['prediction']))[:num] return sorted_items def predict_score(self, user_id, item_id): p_cb = self.cb.predict_score(user_id, item_id) p_cf = self.cf.predict_score(user_id, item_id) self.prediction(p_cb, p_cf, user_id) def prediction(self, p_cb, p_cf, user_id): p = (self.wcb1 * self.fun1() * p_cb + self.wcb2 * self.fun2(user_id) * p_cb + self.wcf1 * self.fun1() * p_cf + self.wcf2 * self.fun2(user_id) * p_cf) return p + self.intercept