def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test): self.urm = urm self.n_users, self.n_items = urm.getCSR().shape self.setEnables(enable_dict ) self.item_item = IIHybridRecommender(urm, icm, icm2) self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1) self.user = CollaborativeFiltering() self.user.fit(urm_t, k=100, h=0, mode='user') if self.enableSVD: self.svd = SVDRecommender(urm, nf=385) if self.enableP3A: self.p3a = P3alpha(urm.getCSR()) self.p3a.fit(topK=80, alpha=1, min_rating=0, implicit=True, normalize_similarity=True) if self.enableSLIM: choice = 2 logFile = open("SLIM_BPR_Cython.txt", "a") self.slim = SLIM_BPR_Cython(urm.getCSR(), recompile_cython=False, positive_threshold=0, URM_validation=urm_test.getCSR(), final_model_sparse_weights=True, train_with_sparse_weights=False) self.slim.fit(epochs=100, validation_every_n=1, logFile=logFile, batch_size=5, topK=200, sgd_mode="adagrad", learning_rate=0.075) if self.enableLFM: # LightFM print("starting USER CF") self.lfm = LightFMRecommender() self.lfm.fit(urm, epochs=100) print("USER CF finished")
def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test, recalcSLIM=True): self.urm = urm self.setEnables(enable_dict) self.item_item = IIHybridRecommender(urm, icm, icm2) self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1) if self.enableUSER: self.cbu = CollaborativeFiltering() self.cbu.fit(urm_t, k=100, h=0, mode='user') if self.enableRP3B: self.rp3b = RP3betaRecommender(urm.getCSR()) if self.enableP3A: self.p3a = P3alpha(urm.getCSR()) self.p3a.fit(topK=80, alpha=1, min_rating=0, implicit=True, normalize_similarity=True) if self.enableSLIM: if recalcSLIM: choice = 2 logFile = open("SLIM_BPR_Cython.txt", "a") self.slim = SLIM_BPR_Cython(urm.getCSR(), recompile_cython=False, positive_threshold=0, URM_validation=urm_test.getCSR(), final_model_sparse_weights=True, train_with_sparse_weights=False) self.slim.fit(epochs=100, validation_every_n=1, logFile=logFile, batch_size=5, topK=200, sgd_mode="adagrad", learning_rate=0.075) self.slim_sim = self.slim.get_similarity() # with open('slim_sub.pkl', 'wb') as output: # pickle.dump(self.slim, output, pickle.HIGHEST_PROTOCOL) else: with open('slim_test.pkl', 'rb') as input: self.slim = pickle.load(input)
def __init__(self, urm, urm_t, icm, icm2, weights_dict): self.urm = urm #setting ite-item hybrid weights self.item_weight = weights_dict.get('item_weight', 0) self.cbf_weight = weights_dict.get('cbf_weight', 0) self.cbf2_weight = weights_dict.get('cbf2_weight', 0) # User based print("starting USER CF") self.cbu = CollaborativeFiltering() self.cbu.fit(urm_t, k=100, h=8, mode='user') print("USER CF finished") # Item-item hybrid recommender print("starting ITEM-ITEM HYBRID") self.iih = IIHybridRecommender(urm, icm, icm2) self.iih.fit(self.item_weight, self.cbf_weight, self.cbf2_weight) print("ITEM-ITEM HYBRID finished")
class ListHybridRecommender(): def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test, recalcSLIM=True): self.urm = urm self.setEnables(enable_dict) self.item_item = IIHybridRecommender(urm, icm, icm2) self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1) if self.enableUSER: self.cbu = CollaborativeFiltering() self.cbu.fit(urm_t, k=100, h=0, mode='user') if self.enableRP3B: self.rp3b = RP3betaRecommender(urm.getCSR()) if self.enableP3A: self.p3a = P3alpha(urm.getCSR()) self.p3a.fit(topK=80, alpha=1, min_rating=0, implicit=True, normalize_similarity=True) if self.enableSLIM: if recalcSLIM: choice = 2 logFile = open("SLIM_BPR_Cython.txt", "a") self.slim = SLIM_BPR_Cython(urm.getCSR(), recompile_cython=False, positive_threshold=0, URM_validation=urm_test.getCSR(), final_model_sparse_weights=True, train_with_sparse_weights=False) self.slim.fit(epochs=100, validation_every_n=1, logFile=logFile, batch_size=5, topK=200, sgd_mode="adagrad", learning_rate=0.075) self.slim_sim = self.slim.get_similarity() # with open('slim_sub.pkl', 'wb') as output: # pickle.dump(self.slim, output, pickle.HIGHEST_PROTOCOL) else: with open('slim_test.pkl', 'rb') as input: self.slim = pickle.load(input) def fit(self, weights_dict=None, norm="none", w_method="count"): self.norm_method = norm self.weights_dict = weights_dict self.w_method = w_method self.item_item_weight = weights_dict.get('item_item_weight', 0) self.rp3b_weight = weights_dict.get('rp3b_weight', 0) self.slim_weight = weights_dict.get('slim_weight', 0) self.user_weight = weights_dict.get('user_weight', 0) self.p3a_weight = weights_dict.get('p3a_weight', 0) def s_recommend(self, user, nRec=10): weighting_dict = {} #recommended_items_item_item = self.normalize_row(self.item_item.get_pred_row(user), method=self.norm_method) recommended_items_item_item = self.item_item.s_recommend( user, nRec).tolist() weighting_dict['ii'] = (recommended_items_item_item, self.item_item_weight) recommended_items_rp3b = None if (self.enableSVD): #recommended_items_rp3b = self.normalize_row(self.svd.get_pred_row(user), method=self.norm_method) recommended_items_rp3b = self.rp3b.s_recommend(user, nRec).tolist() weighting_dict['rp3b'] = (recommended_items_rp3b, self.rp3b_weight) recommended_items_p3a = None if (self.enableP3A): # recommended_items_svd = self.normalize_row(self.svd.get_pred_row(user), method=self.norm_method) recommended_items_p3a = self.p3a.s_recommend(user, nRec) weighting_dict['p3a'] = (recommended_items_p3a, self.p3a_weight) recommended_items_user = None if (self.enableUSER): recommended_items_user = self.cbu.s_recommend(user, nRec).tolist() weighting_dict['user'] = (recommended_items_user, self.user_weight) recommended_items_slim = None if (self.enableSLIM): #recommended_items_slim = self.normalize_row(self.getSlimRow(user), method=self.norm_method) recommended_items_slim = self.slim.s_recommend(user, nRec) weighting_dict['slim'] = (recommended_items_slim, self.slim_weight) return self.list_weighter(weighting_dict, nRec, 0, self.w_method) #return list_merger(weighting_dict, nRec) def m_recommend(self, user_ids, nRec=10): results = [] for uid in user_ids: results.append(self.s_recommend(uid, nRec)) return results def list_weighter(self, tupleDict, nRec, extra, weighting='parab'): """ :param tupleDict : dict{(list_of_items, weight)} assumes list_of_items is ordered from best rec to worst rec :param nRec : number of items to recommend :param extra : number of extra_items to consider in the lists :param weighting : - "linear" 1st place 10, 2nd place 9 ... 10th place 1 - "parab" 1st place 10,.. 5th place 3.5 ... 10th place 1 :return list of nRec items weighted according to dict """ # initialize a dict with items as keys and starting value zero result = {} count_dict = {} for tuple in tupleDict.values(): items = tuple[0] for i in range(nRec + extra): result[str(items[i])] = 0 count_dict[str(items[i])] = 0 # assign a score based on position for tuple in tupleDict.values(): items = tuple[0] weight = tuple[1] # weighting logic if weighting == 'linear': for i in range(nRec + extra): result[str(items[i])] += (nRec + extra - i) * weight elif weighting == 'parab': for i in range(nRec + extra): result[str( items[i])] += (0.1 * i**2 - 1.92 * i + nRec) * weight elif weighting == 'avg': for i in range(nRec + extra): result[str(items[i])] += (nRec - i) / 3 elif weighting == 'count_par': for i in range(nRec + extra): count_dict[str(items[i])] += 1 for i in range(nRec + extra): result[str(items[i])] += (0.1 * i ** 2 - 1.92 * i + nRec) * weight \ + 4 * count_dict.get(str(items[i])) else: raise ValueError('Not a valid weighting logic') # sort the dict sorted_results = sorted(result.items(), key=itemgetter(1)) rec_items = [x[0] for x in sorted_results] # flip to order by decreasing order rec_items = rec_items[::-1] # return only the topN recommendations return np.array(rec_items[0:nRec]).astype(int) def setEnables(self, enable_dict): self.enableSVD = enable_dict.get('enableSVD') self.enableSLIM = enable_dict.get('enableSLIM') self.enableUSER = enable_dict.get('enableUSER', False) self.enableP3A = enable_dict.get('enableP3A', False) def _filter_seen(self, user_id, ranking): user_profile = self.urm.getCSR()[user_id] seen = user_profile.indices unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True) return ranking[unseen_mask] def normalize_row(self, recommended_items, method): if method == 'max': norm_factor = recommended_items.max() if norm_factor == 0: norm_factor = 1 return recommended_items / norm_factor elif method == 'sum': norm_factor = recommended_items.sum() if norm_factor == 0: norm_factor = 1 return recommended_items / norm_factor elif method == "none": return recommended_items else: raise ValueError('Not a valid normalization method') def getSlimRow(self, user): return self.urm.getCSR().getrow(user) * self.slim_sim def remove_duplicates(self, ordered_list): """ :param ordered_list :return: the ordered_list still ordered removed of duplicates """ seen = set() seen_add = seen.add return [x for x in ordered_list if not (x in seen or seen_add(x))]
def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test=None): self.urm = urm self.setEnables(enable_dict) if self.enableRP3B: self.rp3b = RP3betaRecommender(urm.getCSR()) self.rp3b.fit(topK=100, alpha=0.7, beta=0.3, normalize_similarity=True, implicit=True) if self.enableSLIM: choice = 2 logFile = open("SLIM_BPR_Cython.txt", "a") self.slim = SLIM_BPR_Cython(urm.getCSR(), recompile_cython=False, positive_threshold=0, URM_validation=urm_test.getCSR(), final_model_sparse_weights=True, train_with_sparse_weights=False) self.slim.fit(epochs=100, validation_every_n=1, logFile=logFile, batch_size=5, topK=200, sgd_mode="adagrad", learning_rate=0.075) self.slim_sim = self.slim.get_similarity() if self.enableP3A: self.p3a = P3alpha(urm.getCSR()) self.p3a.fit(topK=80, alpha=1, min_rating=0, implicit=True, normalize_similarity=True) # if self.enableCBF2: # print("starting CBF2") # self.cbf2 = ContentBasedFiltering(icm2, urm, k=25, shrinkage=0) # self.cbf2.fit() # print("CBF2 finished") if self.enableLFM: # LightFM print("starting USER CF") self.lfm = LightFMRecommender() self.lfm.fit(urm, epochs=100) print("USER CF finished") if self.enableSVD: self.svd = PureSVDRecommender(urm.getCSR()) self.svd.fit(num_factors=225) print("USER CF finished") # User based print("starting USER CF") self.cbu = CollaborativeFiltering() self.cbu.fit(urm_t, k=100, h=0, mode='user') print("USER CF finished") self.item_item = IIHybridRecommender(urm, icm, icm2) self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1)
class UserItemHybridRecommender(): def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test=None): self.urm = urm self.setEnables(enable_dict) if self.enableRP3B: self.rp3b = RP3betaRecommender(urm.getCSR()) self.rp3b.fit(topK=100, alpha=0.7, beta=0.3, normalize_similarity=True, implicit=True) if self.enableSLIM: choice = 2 logFile = open("SLIM_BPR_Cython.txt", "a") self.slim = SLIM_BPR_Cython(urm.getCSR(), recompile_cython=False, positive_threshold=0, URM_validation=urm_test.getCSR(), final_model_sparse_weights=True, train_with_sparse_weights=False) self.slim.fit(epochs=100, validation_every_n=1, logFile=logFile, batch_size=5, topK=200, sgd_mode="adagrad", learning_rate=0.075) self.slim_sim = self.slim.get_similarity() if self.enableP3A: self.p3a = P3alpha(urm.getCSR()) self.p3a.fit(topK=80, alpha=1, min_rating=0, implicit=True, normalize_similarity=True) # if self.enableCBF2: # print("starting CBF2") # self.cbf2 = ContentBasedFiltering(icm2, urm, k=25, shrinkage=0) # self.cbf2.fit() # print("CBF2 finished") if self.enableLFM: # LightFM print("starting USER CF") self.lfm = LightFMRecommender() self.lfm.fit(urm, epochs=100) print("USER CF finished") if self.enableSVD: self.svd = PureSVDRecommender(urm.getCSR()) self.svd.fit(num_factors=225) print("USER CF finished") # User based print("starting USER CF") self.cbu = CollaborativeFiltering() self.cbu.fit(urm_t, k=100, h=0, mode='user') print("USER CF finished") self.item_item = IIHybridRecommender(urm, icm, icm2) self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1) # # Item based # print("starting ITEM CF") # self.cbi = CollaborativeFiltering() # self.cbi.fit(urm, k=125, h=0, mode='item') # print("ITEM CF finished") # # # Content based artist # print("starting CBF") # self.cbf = ContentBasedFiltering(icm, urm, k=25, shrinkage=0) # self.cbf.fit() # print("CBF finished") def fit(self, weights_dict, method='rating_weight', norm='max'): self.svd_weight = weights_dict.get('svd_weight', 0) self.user_weight = weights_dict.get('user_weight', 0) self.item_weight = weights_dict.get('item_weight', 0) self.cbf_weight = weights_dict.get('cbf_weight', 0) self.cbf2_weight = weights_dict.get('cbf2_weight', 0) self.rp3b_weight = weights_dict.get('rp3b_weight', 0) self.slim_weight = weights_dict.get('slim_weight', 0) self.p3a_weight = weights_dict.get('p3a_weight', 0) self.lfm_weight = weights_dict.get('lfm_weight', 0) self.method = method self.norm = norm def s_recommend(self, user, nRec=10, switchTH="15"): if self.method == 'item_weight': extra = 1 recommended_items_user = self.cbu.s_recommend(user, nRec + extra) recommended_items_item = self.cbi.s_recommend(user, nRec + extra) recommended_items_cbf = self.cbf.s_recommend(user, nRec + extra) weighting_dict = { 'user': (recommended_items_user, self.user_weight), 'item': (recommended_items_item, self.item_weight), 'cbf': (recommended_items_cbf, self.cbf_weight) } if (self.enableCBF2): recommended_items_cbf2 = self.cbf2.s_recommend( user, nRec + extra) weighting_dict['cbf2'] = (recommended_items_cbf2, self.cbf2_weight) if (self.enableLFM): recommended_items_lfm = self.lfm.s_recommend( user, nRec + extra) weighting_dict['lfm'] = (recommended_items_lfm, self.lfm_weight) if (self.enableSVD): recommended_items_svd = self.svd.s_recommend( user, nRec + extra) weighting_dict['svd'] = (recommended_items_svd, self.svd_weight) if (self.enableSLIM): recommended_items_slim = self.slim.s_recommend( user, nRec + extra) weighting_dict['slim'] = (recommended_items_slim, self.slim_weight) if (self.enableP3A): recommended_items_p3a = self.p3a.s_recommend( user, nRec + extra) weighting_dict['p3a'] = (recommended_items_p3a, self.p3a_weight) return self.item_weighter(weighting_dict, nRec, extra) elif self.method == 'rating_weight': norm_method = self.norm recommended_items_user = self.normalize_row( self.cbu.get_pred_row(user), method=norm_method) recommended_items_item = self.normalize_row( self.cbi.get_pred_row(user), method=norm_method) recommended_items_cbf = self.normalize_row( self.cbf.get_pred_row(user), method=norm_method) pred_row_sparse = recommended_items_user * self.user_weight + recommended_items_item * self.item_weight \ + recommended_items_cbf * self.cbf_weight if self.enableSLIM: recommended_items_slim = self.normalize_row( self.getSlimRow(user), method=norm_method) pred_row_sparse = pred_row_sparse + self.slim_weight * recommended_items_slim if self.enableCBF2: recommended_items_cbf2 = self.normalize_row( self.cbf2.get_pred_row(user), method=norm_method) pred_row_sparse = pred_row_sparse + self.cbf2_weight * recommended_items_cbf2 if self.enableP3A: row = self.p3a.get_pred_row(user) pred_row_sparse = pred_row_sparse + self.p3a_weight * row if self.enableRP3B: row = self.rp3b.get_pred_row(user) pred_row_sparse = pred_row_sparse + self.rp3b_weight * row pred_row = np.array(pred_row_sparse.todense()).squeeze() if self.enableLFM: recommended_items_lfm = self.normalize_row( self.lfm.get_pred_row(user), method=norm_method) pred_row = pred_row + self.lfm_weight * recommended_items_lfm if self.enableSVD: recommended_items_svd = self.normalize_row( self.svd.get_pred_row(user), method=norm_method) pred_row = pred_row + self.svd_weight * recommended_items_svd ranking = np.argsort(-pred_row) recommended_items = self._filter_seen(user, ranking) return recommended_items[0:nRec] elif self.method == "hybrid": norm_method = 'max' extra = 1 recommended_items_user = self.normalize_row( self.cbu.get_pred_row(user), method=norm_method) recommended_items_item = self.normalize_row( self.cbi.get_pred_row(user), method=norm_method) recommended_items_cbf = self.normalize_row( self.cbf.get_pred_row(user), method=norm_method) recommended_items_cbf2 = None if (self.enableCBF2): recommended_items_cbf2 = self.normalize_row( self.cbf2.get_pred_row(user), method=norm_method) recommended_items_rp3b = None if (self.enableRP3B): recommended_items_rp3b = self.normalize_row( self.rp3b.get_pred_row(user), method=norm_method) recommended_items_slim = None if (self.enableSLIM): recommended_items_slim = self.normalize_row( self.getSlimRow(user), method=norm_method) weighting_dict = {} return self.item_weighter(weighting_dict, nRec, extra) elif self.method == 'switch': if len(self.urm.extractTracksFromPlaylist(user)) < switchTH: # enough recommendations, use user return self.cbu.s_recommend(user, nRec=nRec) else: # not enough recommendations, use item return self.cbi.s_recommend(user, nRec=nRec) else: raise ValueError('Not a valid hybrid method') def m_recommend(self, user_ids, nRec=10): results = [] for uid in user_ids: results.append(self.s_recommend(uid, nRec)) return results def item_weighter(self, tupleDict, nRec, extra): # initialize a dict with recommended items as keys and value zero result = {} for tuple in tupleDict.values(): items = tuple[0] for i in range(nRec + extra): result[str(items[i])] = 0 # assign a score based on position for tuple in tupleDict.values(): items = tuple[0] weight = tuple[1] for i in range(nRec + extra): result[str(items[i])] += (nRec + extra - i) * weight # sort the dict sorted_results = sorted(result.items(), key=itemgetter(1)) rec_items = [x[0] for x in sorted_results] # flip to order by decreasing order rec_items = rec_items[::-1] # return only the topN recommendations return np.array(rec_items[0:nRec]).astype(int) def predWeightRatingRows(self, user, nRec, recommended_items_user, recommended_items_item, recommended_items_cbf, recommended_items_cbf2, recommended_items_rp3b, recommended_items_slim): pred_row_sparse = recommended_items_user * self.user_weight + recommended_items_item * self.item_weight \ + recommended_items_cbf * self.cbf_weight if self.enableSLIM and self.method != "hybrid": pred_row_sparse = pred_row_sparse + self.slim_weight * recommended_items_slim if self.enableCBF2: pred_row_sparse = pred_row_sparse + self.cbf2_weight * recommended_items_cbf2 # needs to be before rp3b because rp3b output is dense pred_row = np.array(pred_row_sparse.todense()).squeeze() if self.enableRP3B: pred_row = pred_row + self.rp3b_weight * recommended_items_rp3b ranking = np.argsort(-pred_row) recommended_items = self._filter_seen(user, ranking) return recommended_items[0:nRec] def _filter_seen(self, user_id, ranking): seen = self.urm.extractTracksFromPlaylist(user_id) unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True) return ranking[unseen_mask] def getSlimRow(self, user): return self.urm.getCSR().getrow(user) * self.slim_sim def setEnables(self, enable_dict): self.enableSVD = enable_dict.get('enableSVD') self.enableRP3B = enable_dict.get('enableRP3B') self.enableSLIM = enable_dict.get('enableSLIM') self.enableCBF2 = enable_dict.get('enableCBF2') self.enableP3A = enable_dict.get('enableP3A') self.enableLFM = enable_dict.get('enableLFM') def normalize_row(self, recommended_items, method): if method == 'max': norm_factor = recommended_items.max() if norm_factor == 0: norm_factor = 1 return recommended_items / norm_factor elif method == 'sum': norm_factor = recommended_items.sum() if norm_factor == 0: norm_factor = 1 return recommended_items / norm_factor elif method == 'l1': return normalize(recommended_items, norm='l1') elif method == 'l2': return normalize(recommended_items, norm='l2') else: raise ValueError('Not a valid normalization method')
class UserItemHybridRecommender_v3(): def __init__(self, urm, urm_t, icm, icm2, weights_dict): self.urm = urm #setting ite-item hybrid weights self.item_weight = weights_dict.get('item_weight', 0) self.cbf_weight = weights_dict.get('cbf_weight', 0) self.cbf2_weight = weights_dict.get('cbf2_weight', 0) # User based print("starting USER CF") self.cbu = CollaborativeFiltering() self.cbu.fit(urm_t, k=100, h=8, mode='user') print("USER CF finished") # Item-item hybrid recommender print("starting ITEM-ITEM HYBRID") self.iih = IIHybridRecommender(urm, icm, icm2) self.iih.fit(self.item_weight, self.cbf_weight, self.cbf2_weight) print("ITEM-ITEM HYBRID finished") def fit(self, enable_dict, weights_dict, method='weight_norm'): self.setEnables(enable_dict) self.user_weight = weights_dict.get('user_weight', 0) self.svd_weight = weights_dict.get('svd_weight', 0) self.method = method def s_recommend(self, user, nRec=10, switchTH="15"): if self.method == 'weight_norm': norm_method = 'max' recommended_items_user = self.normalize_row( self.cbu.get_pred_row(user), method=norm_method) recommended_items_iiHybrid = self.normalize_row( self.iih.get_pred_row(user), method=norm_method) return self.predWeightRatingRows(user, nRec, recommended_items_user, recommended_items_iiHybrid) elif self.method == 'switch': if len(self.urm.extractTracksFromPlaylist(user)) < switchTH: # enough recommendations, use user return self.cbu.s_recommend(user, nRec=nRec) else: # not enough recommendations, use item return self.cbi.s_recommend(user, nRec=nRec) else: raise ValueError('Not a valid hybrid method') def m_recommend(self, user_ids, nRec=10): results = [] for uid in user_ids: results.append(self.s_recommend(uid, nRec)) return results def predWeightRatingRows(self, user, nRec, recommended_items_user, recommended_items_iiHybrid): """ playlist_tracks = self.urm.extractTracksFromPlaylist(user) num_tracks = playlist_tracks.size extra_weight = num_tracks / 1000 if(num_tracks > 8): extra_weight += 0.03 if(num_tracks > 15): extra_weight += 0.03 if (num_tracks > 20): extra_weight += 0.03 if (num_tracks > 33): extra_weight += 0.04""" pred_row_sparse = recommended_items_user * self.user_weight + recommended_items_iiHybrid * self.item_weight # needs to be before svd because svd output is dense pred_row = np.array(pred_row_sparse.todense()).squeeze() ranking = np.argsort(-pred_row) recommended_items = self._filter_seen(user, ranking) return recommended_items[0:nRec] def _filter_seen(self, user_id, ranking): seen = self.urm.extractTracksFromPlaylist(user_id) unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True) return ranking[unseen_mask] def setEnables(self, enable_dict): self.enableSVD = enable_dict.get('enableSVD') self.enableSLIM = enable_dict.get('enableSLIM') self.enableCBF2 = enable_dict.get('enableCBF2') def normalize_row(self, recommended_items, method): if method == 'max': norm_factor = recommended_items.max() if norm_factor == 0: norm_factor = 1 return recommended_items / norm_factor elif method == 'sum': norm_factor = recommended_items.sum() if norm_factor == 0: norm_factor = 1 return recommended_items / norm_factor else: raise ValueError('Not a valid normalization method')
class XGBoostRecommender(): def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test): self.urm = urm self.n_users, self.n_items = urm.getCSR().shape self.setEnables(enable_dict ) self.item_item = IIHybridRecommender(urm, icm, icm2) self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1) self.user = CollaborativeFiltering() self.user.fit(urm_t, k=100, h=0, mode='user') if self.enableSVD: self.svd = SVDRecommender(urm, nf=385) if self.enableP3A: self.p3a = P3alpha(urm.getCSR()) self.p3a.fit(topK=80, alpha=1, min_rating=0, implicit=True, normalize_similarity=True) if self.enableSLIM: choice = 2 logFile = open("SLIM_BPR_Cython.txt", "a") self.slim = SLIM_BPR_Cython(urm.getCSR(), recompile_cython=False, positive_threshold=0, URM_validation=urm_test.getCSR(), final_model_sparse_weights=True, train_with_sparse_weights=False) self.slim.fit(epochs=100, validation_every_n=1, logFile=logFile, batch_size=5, topK=200, sgd_mode="adagrad", learning_rate=0.075) if self.enableLFM: # LightFM print("starting USER CF") self.lfm = LightFMRecommender() self.lfm.fit(urm, epochs=100) print("USER CF finished") def buildXGBoostMatrix(self, recommenders, n): print("building XGBoost Matrix") user_id_col = [] slim_rec_col = [] itit_rec_col = [] p3a_rec_col = [] svd_rec_col = [] user_rec_col = [] lfm_rec_col = [] prof_len_col = [] for user in range(self.n_users): # Item Item itit_rec = self.item_item.s_recommend(user, n).tolist() user_id_col.extend(itit_rec) itit_rec_col.extend([user] * len(itit_rec)) # User user_rec = self.user.g(user, n) user_id_col.extend(user_rec) user_rec_col.extend([user] * len(user_rec)) # P3A if self.enableP3A: p3a_rec = self.p3a.s_recommend(user, n) user_id_col.extend(p3a_rec) p3a_rec_col.extend([user] * len(p3a_rec)) # SVD if self.enableSVD: svd_rec = self.svd.s_recommend(user, n) user_id_col.extend(svd_rec) svd_rec_col.extend([user] * len(svd_rec)) # LFM if self.enableLFM: lfm_rec = self.lfm.s_recommend(user, n) user_id_col.extend(lfm_rec) lfm_rec_col.extend([user] * len(lfm_rec)) # SLIM if self.enableSLIM: slim_rec = self.slim.s_recommend(user, n) user_id_col.extend(slim_rec) slim_rec_col.extend([user] * len(slim_rec)) # Profile Len profileLen = len(self.urm.extractTracksFromPlaylist(user)) prof_len_col.extend([profileLen] * len(user_rec)) dict = {"user_id": user_id_col, "itit_rec_id": itit_rec_col, "user_rec_id": user_rec_col} # "slim_rec_id": slim_rec_col, # "p3a_rec_id": p3a_rec_col, # "lfm_rec_id": lfm_rec_col, # "svd_rec_id": svd_rec_col, # "profile_len": prof_len_col} self.buildDataFrame(dict) def setEnables(self, enable_dict): self.enableSVD = enable_dict.get('enableSVD') self.enableSLIM = enable_dict.get('enableSLIM') self.enableCBF2 = enable_dict.get('enableCBF2') self.enableP3A = enable_dict.get('enableP3A') self.enableLFM = enable_dict.get('enableLFM') def buildDataFrame(self, dict): print("building dataframe") self.df = pd.DataFrame(dict) self.df.describe() print("built df")
hr.fit(weights_dict, method='rating_weight', norm='max') if submission: recommended_items = hr.m_recommend(targetList, nRec=10) generate_output(targetList, recommended_items) else: print("Evaluating") cumulative_precision, cumulative_recall, cumulative_MAP = evaluate_algorithm( urm_test, hr) print( "Recommender, performance is: Precision = {:.4f}, Recall = {:.4f}, MAP = {:.5f}" .format(cumulative_precision, cumulative_recall, cumulative_MAP)) elif htype == "ii": hr = IIHybridRecommender(urm, icm, icm2) hr.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1) if submission: recommended_items = hr.m_recommend(targetList, nRec=10) generate_output(targetList, recommended_items) else: cumulative_precision, cumulative_recall, cumulative_MAP = evaluate_algorithm( urm_test, hr) print( "Recommender, performance is: Precision = {:.4f}, Recall = {:.4f}, MAP = {:.5f}" .format(cumulative_precision, cumulative_recall, cumulative_MAP)) elif htype == "icbf": cb = RevisedCBF(icm.getCSR(), urm.getCSR(), sparse_weights=True) cb.fit(topK=50, shrink=10,