def run_SLIM(): dataReader = Movielens10MReader() URM_train = dataReader.get_URM_train() URM_test = dataReader.get_URM_test() recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False, positive_threshold=4, sparse_weights=True) #recommender = MF_BPR_Cython(URM_train, recompile_cython=False, positive_threshold=4) logFile = open("Result_log.txt", "a") recommender.fit(epochs=2, validate_every_N_epochs=1, URM_test=URM_test, logFile=logFile, batch_size=1, sgd_mode='rmsprop', learning_rate=1e-4) results_run = recommender.evaluateRecommendations(URM_test, at=5) print(results_run)
class Hybrid101AlphaRecommender(BaseRecommender): """Hybrid101AlphaRecommender recommender""" RECOMMENDER_NAME = "Hybrid101AlphaRecommender" def __init__(self, data: DataObject): super(Hybrid101AlphaRecommender, self).__init__(data.urm_train) self.data = data self.rec1 = SLIM_BPR_Cython(data.urm_train) self.rec2 = ItemKNNCFRecommender(data.urm_train) self.rec3 = RP3betaRecommender(data.urm_train) self.rec1.fit(sgd_mode="adagrad", topK=15000, epochs=250, learning_rate=1e-05, lambda_i=0.01, lambda_j=0.01) # self.rec1.fit(topK=435, epochs=115, symmetric=True, sgd_mode='adagrad', lambda_i=0.0010067865845523253, # lambda_j=3.764224446055186e-05, learning_rate=0.00024125117955549121) self.rec2.fit(topK=20000, shrink=20000, feature_weighting="TF-IDF") self.rec3.fit(topK=10000, alpha=0.55, beta=0.01, implicit=True, normalize_similarity=True) self.hybrid_rec = Hybrid1XXAlphaRecommender( data, recommenders=[self.rec1, self.rec2, self.rec3], max_cutoff=20) def fit(self): weights = [[ 69.4, 25.7, 11.7, 9.4, 8.4, 5.4, 6.6, 6., 5.5, 5.6, 5., 4.4, 3.3, 5.7, 4.2, 3.7, 4.5, 2.8, 3.8, 3.4 ], [ 77.8, 29.3, 17.4, 9., 8.5, 8.9, 5.9, 5.9, 5.4, 5.1, 6., 6.3, 4.4, 4.6, 5.2, 4.9, 3.5, 3.3, 3.5, 4.3 ], [ 78.5, 29.2, 15.6, 10.9, 9.4, 6.5, 8.3, 5.7, 6.3, 6.6, 4.3, 4.2, 4.3, 4.6, 6.1, 4.7, 5.1, 4.7, 4.9, 5.1 ]] self.hybrid_rec.fit(weights=weights) def recommend(self, user_id_array, cutoff=None, remove_seen_flag=True, items_to_compute=None, remove_top_pop_flag=False, remove_CustomItems_flag=False, return_scores=False): return self.hybrid_rec.recommend(user_id_array=user_id_array, cutoff=cutoff)
class Hybrid102AlphaRecommender(BaseRecommender): """Hybrid102AlphaRecommender recommender""" RECOMMENDER_NAME = "Hybrid102AlphaRecommender" def __init__(self, data: DataObject): super(Hybrid102AlphaRecommender, self).__init__(data.urm_train) self.data = data self.rec1 = UserKNNCFRecommender(data.urm_train) self.rec1.fit(topK=1000, shrink=4500, similarity="cosine", feature_weighting="TF-IDF") self.rec2 = ItemKNNCFRecommender(data.urm_train) self.rec2.fit(topK=2000, shrink=800, similarity="cosine", feature_weighting="TF-IDF") self.rec3 = SLIM_BPR_Cython(data.urm_train) self.rec3.fit(epochs=120, topK=800, lambda_i=0.1, lambda_j=0.1, learning_rate=0.0001) self.rec4 = RP3betaRecommender(data.urm_train) self.rec4.fit(topK=30, alpha=0.21, beta=0.25) target_users = data.urm_train_users_by_type[2][1] self.target_users = target_users self.hybrid_rec = Hybrid1CXAlphaRecommender(data, recommenders=[self.rec1, self.rec2, self.rec3], recommended_users=target_users, max_cutoff=30) def fit(self): weights1 = np.array([[0.37625119, 0.43193487, 0.17444842, 0.16197883, 0.18204363, 0.17016599, 0.14983434, 0.11938279, 0.09980418, 0.1147748, 0.12762677, 0.08689066, 0.09533745, 0.10492991, 0.097475, 0.05278562, 0.05244627, 0.0602501, 0.06743845, 0.06145589, 0.07008017, 0.07410305, 0.07170746, 0.04231058, 0.04493697, 0.02516579, 0.0176046, 0.01360429, 0., 0.], [0.55298149, 0.27456885, 0.2278579, 0.25095311, 0.24721051, 0.09937549, 0.09209609, 0.07158969, 0.07174988, 0.08251237, 0.09157335, 0.10530935, 0.1106961, 0.12150468, 0.12001527, 0.10052318, 0.09536568, 0.10770821, 0.08553278, 0.06198749, 0.05708056, 0.05176975, 0.05953521, 0.05567152, 0.06083775, 0.02776653, 0.02663699, 0.01181728, 0.01168978, 0.], [0.25041731, 0.15536414, 0.16953122, 0.17164006, 0.11443169, 0.11873982, 0.07100542, 0.06452205, 0.06123626, 0.06430055, 0.06311274, 0.05618836, 0.05331187, 0.04611177, 0.04239514, 0.03824963, 0.04398116, 0.04738213, 0.04862799, 0.03962175, 0.04556502, 0.04738956, 0.054498, 0.0626727, 0.04973429, 0.03219802, 0.03227312, 0.0307041, 0.03396853, 0.], [1., 0.5538608, 0.44692181, 0.20321725, 0.22012478, 0.1873366, 0.14329206, 0.09783222, 0.10765581, 0.10658318, 0.12257066, 0.13699397, 0.15743225, 0.12181424, 0.13897041, 0.08672218, 0.09188654, 0.05170634, 0.04459521, 0.04785834, 0.05248675, 0.06035977, 0.06733202, 0.06760871, 0.07775002, 0.0720465, 0.05977294, 0.04260028, 0.00546561, 0.0055422]]) self.hybrid_rec.weights = weights1 def recommend(self, user_id_array, cutoff=None, remove_seen_flag=True, items_to_compute=None, remove_top_pop_flag=False, remove_CustomItems_flag=False, return_scores=False): if user_id_array in self.target_users: return self.hybrid_rec.recommend(user_id_array=user_id_array, cutoff=cutoff) else: return []
def __init__(self, urm_train, eurm=False): super(HybridNorm2Recommender, self).__init__(urm_train) self.data_folder = Path(__file__).parent.parent.absolute() self.eurm = eurm self.num_users = urm_train.shape[0] urm_train = check_matrix(urm_train.copy(), 'csr') recommender_2 = ItemKNNCFRecommender(urm_train) recommender_2.fit(topK=5, shrink=500, feature_weighting='BM25', similarity='tversky', normalize=False, tversky_alpha=0.0, tversky_beta=1.0) recommender_3 = UserKNNCFRecommender(urm_train) recommender_3.fit(shrink=2, topK=600, normalize=True) # recommender_3 = UserKNNCFRecommender(urm_train) # recommender_3.fit(topK=697, shrink=1000, feature_weighting='TF-IDF', similarity='tversky', normalize=False, # tversky_alpha=1.0, tversky_beta=1.0) recommender_4 = RP3betaRecommender(urm_train) recommender_4.fit(topK=16, alpha=0.03374950051351756, beta=0.24087176329409027, normalize_similarity=True) recommender_5 = SLIM_BPR_Cython(urm_train) recommender_5.fit(lambda_i=0.0926694015, lambda_j=0.001697250, learning_rate=0.002391, epochs=65, topK=200) recommender_6 = ALSRecommender(urm_train) recommender_6.fit(alpha=5, iterations=40, reg=0.3) self.recommender_2 = recommender_2 self.recommender_3 = recommender_3 self.recommender_4 = recommender_4 self.recommender_5 = recommender_5 self.recommender_6 = recommender_6 if self.eurm: self.score_matrix_1 = sps.load_npz(self.data_folder / 'Data/icm_sparse.npz') self.score_matrix_2 = self.recommender_2._compute_item_matrix_score(np.arange(self.num_users)) self.score_matrix_3 = self.recommender_3._compute_item_matrix_score(np.arange(self.num_users)) self.score_matrix_4 = self.recommender_4._compute_item_matrix_score(np.arange(self.num_users)) self.score_matrix_5 = self.recommender_5._compute_item_matrix_score(np.arange(self.num_users)) self.score_matrix_6 = self.recommender_6._compute_item_score(np.arange(self.num_users)) self.score_matrix_7 = sps.load_npz(self.data_folder / 'Data/ucm_sparse.npz') self.score_matrix_1 = normalize(self.score_matrix_1, norm='l2', axis=1) self.score_matrix_2 = normalize(self.score_matrix_2, norm='l2', axis=1) self.score_matrix_3 = normalize(self.score_matrix_3, norm='l2', axis=1) self.score_matrix_4 = normalize(self.score_matrix_4, norm='l2', axis=1) self.score_matrix_5 = normalize(self.score_matrix_5, norm='l2', axis=1) self.score_matrix_6 = normalize(self.score_matrix_6, norm='l2', axis=1) self.score_matrix_7 = normalize(self.score_matrix_7, norm='l2', axis=1)
def __init__(self, urm_train): super(HybridNorm1Recommender, self).__init__(urm_train) self.num_users = urm_train.shape[0] urm_train = check_matrix(urm_train.copy(), 'csr') recommender_1 = HybridGen2Recommender(urm_train) recommender_1.fit() recommender_2 = ItemKNNCFRecommender(urm_train) recommender_2.fit(topK=5, shrink=500, feature_weighting='BM25', similarity='tversky', normalize=False, tversky_alpha=0.0, tversky_beta=1.0) recommender_3 = UserKNNCFRecommender(urm_train) recommender_3.fit(topK=697, shrink=1000, feature_weighting='TF-IDF', similarity='tversky', normalize=False, tversky_alpha=1.0, tversky_beta=1.0) recommender_4 = RP3betaRecommender(urm_train) recommender_4.fit(topK=16, alpha=0.03374950051351756, beta=0.24087176329409027, normalize_similarity=True) recommmender_5 = SLIM_BPR_Cython(urm_train) recommmender_5.fit(lambda_i=0.0926694015, lambda_j=0.001697250, learning_rate=0.002391, epochs=65, topK=200) recommender_6 = ALSRecommender(urm_train) recommender_6.fit(alpha=5, iterations=40, reg=0.3) self.recommender_1 = recommender_1 self.recommender_2 = recommender_2 self.recommender_3 = recommender_3 self.recommender_4 = recommender_4 self.recommender_5 = recommmender_5 self.recommender_6 = recommender_6
def __init__(self, URM_train): super(HybridWarmRecommender, self).__init__(URM_train) recommender_1 = ItemKNNCFRecommender(URM_train) recommender_1.fit(topK=20, shrink=30) recommender_2 = SLIM_BPR_Cython(URM_train) recommender_2.fit(epochs=40, lambda_i=0.01, lambda_j=0.001, learning_rate=1e-4, topK=200) # recommender_3 = TopPop(URM_train) # recommender_3.fit() self.URM_train = check_matrix(URM_train.copy(), 'csr') self.recommender_1 = recommender_1 self.recommender_2 = recommender_2
class Hybrid000AlphaRecommender(BaseRecommender): """Hybrid000AlphaRecommender recommender""" RECOMMENDER_NAME = "Hybrid000AlphaRecommender" def __init__(self, URM_train, UCM, cold_users, warm_users): super(Hybrid000AlphaRecommender, self).__init__(URM_train) self.warm_recommender = SLIM_BPR_Cython(URM_train) self.cold_recommender = UserKNNCBFRecommender(UCM, URM_train) self.cold_users = cold_users self.warm_users = warm_users def fit(self, random_seed=42, epochs=500, topK=14000, shrink=2, lambda_i=0.0, lambda_j=0.0): self.warm_recommender.fit(epochs=epochs, lambda_i=lambda_i, lambda_j=lambda_j, random_seed=random_seed) self.cold_recommender.fit(topK=topK, shrink=shrink, feature_weighting="BM25") def recommend(self, user_id_array, cutoff=None, remove_seen_flag=True, items_to_compute=None, remove_top_pop_flag=False, remove_CustomItems_flag=False, return_scores=False): if user_id_array in self.warm_users: return self.warm_recommender.recommend(user_id_array, cutoff=cutoff) if user_id_array in self.cold_users: return self.cold_recommender.recommend(user_id_array, cutoff=cutoff)
class Hybrid201AlphaRecommender(BaseRecommender): """Hybrid201AlphaRecommender recommender""" RECOMMENDER_NAME = "Hybrid201AlphaRecommender" def __init__(self, data: DataObject): super(Hybrid201AlphaRecommender, self).__init__(data.urm_train) self.data = data urm = data.urm_train urm = sps.vstack([data.urm_train, data.icm_all_augmented.T]) urm = urm.tocsr() self.rec1 = SLIM_BPR_Cython(urm) self.rec2 = ItemKNNCFRecommender(urm) self.rec3 = RP3betaRecommender(urm) self.random_seed = data.random_seed try: self.rec1.load_model( "stored_recommenders/slim_bpr/", f'with_icm_{self.random_seed}_topK=15000_epochs=250_learning_rate=1e-05_lambda_i=0.01_lambda_j=0.01' ) except: self.rec1.fit(sgd_mode="adagrad", topK=15000, epochs=250, learning_rate=1e-05, lambda_i=0.01, lambda_j=0.01) self.rec1.save_model( "stored_recommenders/slim_bpr/", f'with_icm_{self.random_seed}_topK=15000_epochs=250_learning_rate=1e-05_lambda_i=0.01_lambda_j=0.01' ) try: self.rec2.load_model( "stored_recommenders/item_cf/", f'with_icm_{self.random_seed}_topK=20000_shrink=20000_feature_weighting=TF-IDF' ) except: self.rec2.fit(topK=20000, shrink=20000, feature_weighting="TF-IDF") self.rec2.save_model( "stored_recommenders/item_cf/", f'with_icm_{self.random_seed}_topK=20000_shrink=20000_feature_weighting=TF-IDF' ) try: self.rec3.load_model( "stored_recommenders/rp3_beta/", f'with_icm_{self.random_seed}_topK=10000_alpha=0.55_beta=0.01_implicit=True_normalize_similarity=True' ) except: self.rec3.fit(topK=10000, alpha=0.55, beta=0.01, implicit=True, normalize_similarity=True) self.rec3.save_model( "stored_recommenders/rp3_beta/", f'with_icm_{self.random_seed}_topK=10000_alpha=0.55_beta=0.01_implicit=True_normalize_similarity=True' ) self.hybrid_rec = Hybrid1XXAlphaRecommender( data, recommenders=[self.rec1, self.rec2, self.rec3], max_cutoff=20) def fit(self): weights = [[ 69.4, 25.7, 11.7, 9.4, 8.4, 5.4, 6.6, 6., 5.5, 5.6, 5., 4.4, 3.3, 5.7, 4.2, 3.7, 4.5, 2.8, 3.8, 3.4 ], [ 77.8, 29.3, 17.4, 9., 8.5, 8.9, 5.9, 5.9, 5.4, 5.1, 6., 6.3, 4.4, 4.6, 5.2, 4.9, 3.5, 3.3, 3.5, 4.3 ], [ 78.5, 29.2, 15.6, 10.9, 9.4, 6.5, 8.3, 5.7, 6.3, 6.6, 4.3, 4.2, 4.3, 4.6, 6.1, 4.7, 5.1, 4.7, 4.9, 5.1 ]] self.hybrid_rec.fit(weights=weights) def recommend(self, user_id_array, cutoff=None, remove_seen_flag=True, items_to_compute=None, remove_top_pop_flag=False, remove_CustomItems_flag=False, return_scores=False): return self.hybrid_rec.recommend(user_id_array=user_id_array, cutoff=cutoff)
class SSLIM_BPR(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "S-SLIM_BPR" def __init__(self, URM_train, ICM_train, verbose=True, free_mem_threshold=0.5, recompile_cython=False): super(SSLIM_BPR, self).__init__(URM_train, verbose=verbose) self.ICM_train = ICM_train self.free_mem_treshold = free_mem_threshold self.recompile_cython = recompile_cython self.verbose = verbose def fit(self, alpha=0.5, epochs=300, positive_threshold_BPR=None, train_with_sparse_weights=None, symmetric=True, random_seed=None, batch_size=1000, lambda_i=0.0, lambda_j=0.0, learning_rate=1e-4, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999, **earlystopping_kwargs): # build the virtual URM self.alpha = alpha urm = self.URM_train * self.alpha icm = self.ICM_train * (1 - self.alpha) virtual_URM = sps.vstack([urm, icm.T]) self.virtual_URM = virtual_URM.tocsr() self.__slim_bpr = SLIM_BPR_Cython( self.virtual_URM, verbose=self.verbose, free_mem_threshold=self.free_mem_treshold, recompile_cython=self.recompile_cython) self.__slim_bpr.fit( epochs=epochs, positive_threshold_BPR=positive_threshold_BPR, train_with_sparse_weights=train_with_sparse_weights, symmetric=symmetric, random_seed=random_seed, batch_size=batch_size, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK, sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2, **earlystopping_kwargs) self.W_sparse = self.__slim_bpr.W_sparse
pyplot.show() #np.random.seed(1234) URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.97) ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.8) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True) earlystopping_keywargs = { "validation_every_n": 10, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": "MAP", } recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False) recommender.fit( **{ "topK": 1000, "epochs": 130, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 1e-05, "lambda_j": 0.01, "learning_rate": 0.0001 }) print(evaluator_validation.evaluateRecommender(recommender))
class SStorinoEnsemble: def __init__(self, urm_train, urm_test, icm, parameters=None): if parameters is None: parameters = { "USER_CF": 0.8, "USER_BPR": 0.7, "ITEM_CF": 1, "ITEM_BPR": 0.8, "CBF": 0.3, "IALS": 1.0, "CBF_BPR": 1 } self.ensemble_weights = parameters self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.initialize_components() def initialize_components(self): self.user_bpr_recommender = SLIM_BPR_Cython(self.train.T, positive_threshold=0) def fit(self): self.user_bpr_w = self.user_bpr_recommender.fit( epochs=6, topK=200, lambda_i=0.0, lambda_j=0.0, # lambda_j=0.0005 gamma=0.9, beta_1=0.00099, beta_2=0.00099, batch_size=20000, sgd_mode='adagrad', learning_rate=1e-2) def recommend(self, user_id, combiner, at=10): user_profile = self.train[user_id, :] user_bpr_r = self.user_bpr_w[user_id].dot(self.train).toarray().ravel() scores = [ [user_bpr_r, self.ensemble_weights["USER_BPR"], "USER_BPR"], ] for r in scores: self.filter_seen(user_id, r[0]) return combiner.combine(scores, at) def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -1000000 #-np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n = 0 for i in user_list: recList = self.recommend(i, combiner, at).T tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): print('сука')
class HybridLinear10Recommneder(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "HybridLinear10Recommender" def __init__(self, URM_train, seed: int): super(HybridLinear10Recommneder, self).__init__(URM_train) self.slimBPR = SLIM_BPR_Cython(URM_train) self.userKnnCF = UserKNNCFRecommender(URM_train) #self.itemcf = ItemKNNCFRecommender(urm) def fit(self, alpha=1): self.slimBPR.fit(epochs=135, topK=933, symmetric=False, sgd_mode='adagrad', lambda_i=1.054e-05, lambda_j=1.044e-05, learning_rate=0.00029) self.userKnnCF.fit(topK=201, shrink=998, similarity='cosine', normalize=True, feature_weighting='TF-IDF') self.alpha = alpha self.beta = 1 - alpha #self.gamma = alpha_gamma_ratio def _compute_item_score(self, user_id_array, items_to_compute=None): # ATTENTION! # THIS METHOD WORKS ONLY IF user_id_array IS A SCALAR AND NOT AN ARRAY # TODO scores_slimBPR = self.slimBPR._compute_item_score( user_id_array=user_id_array) scores_userKnnCF = self.userKnnCF._compute_item_score( user_id_array=user_id_array) # normalization #slim_max = scores_slim.max() #rp3_max = scores_rp3.max() #itemcf_max = scores_itemcf.max() #if not slim_max == 0: # scores_slim /= slim_max #if not rp3_max == 0: # scores_rp3 /= rp3_max #if not itemcf_max == 0: # scores_itemcf /= itemcf_max scores_total = self.alpha * scores_slimBPR + self.beta * scores_userKnnCF #+ self.gamma * scores_itemcf return scores_total def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save={}) self._print("Saving complete")
URM_train = URM_train.tocsr() URM_test = URM_test.tocsr() URM_validation = URM_validation.tocsr() ICM_all = sps.load_npz('myFiles/ICM_all.npz') evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) itemKNNCF = ItemKNNCFRecommender(URM_train) itemKNNCF.fit(shrink=24, topK=10) recommenderCYTHON = SLIM_BPR_Cython(URM_train, recompile_cython=False) recommenderCYTHON.fit(epochs=2000, batch_size=200, sgd_mode='sdg', learning_rate=1e-5, topK=10) recommenderCB = ItemKNNCBFRecommender(URM_train, ICM_all) recommenderCB.fit(shrink=24, topK=10) recommenderELASTIC = SLIMElasticNetRecommender(URM_train) # recommenderELASTIC.fit(topK=10) # recommenderELASTIC.save_model('model/', file_name='SLIM_ElasticNet') recommenderELASTIC.load_model('model/', file_name='SLIM_ElasticNet_train') # recommenderAlphaGRAPH = P3alphaRecommender(URM_train) # recommenderAlphaGRAPH.fit(topK=10, alpha=0.22, implicit=True, normalize_similarity=True) recommenderBetaGRAPH = RP3betaRecommender(URM_train)
"normalize_similarity": True } userKNNCF_best_parameters = { "topK": 466, "shrink": 9, "similarity": "dice", "normalize": False } URM_train = sps.csr_matrix(URM_train) profile_length = np.ediff1d(URM_train.indptr) block_size = int(len(profile_length) * 0.05) sorted_users = np.argsort(profile_length) slim_model = SLIM_BPR_Cython(URM_train, recompile_cython=False) slim_model.fit(**slim_best_parameters) rp3_model = RP3betaRecommender(URM_train) rp3_model.fit(**rp3_best_parameters) userCF_model = UserKNNCFRecommender(URM_train) userCF_model.fit(**userKNNCF_best_parameters) MAP_slim_per_group = [] MAP_rp3_per_group = [] MAP_userCF_per_group = [] cutoff = 10 URM_train = sps.csr_matrix(URM_train) profile_length = np.ediff1d(URM_train.indptr) block_size = int(len(profile_length) * 0.05) sorted_users = np.argsort(profile_length)
class Hybrid110Recommender(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "Hybrid110Recommender" def __init__(self, URM_train, seed: int): super(Hybrid110Recommender, self).__init__(URM_train) self.number_of_interactions_per_user = (self.URM_train > 0).sum(axis=1) self.highRange = SLIM_BPR_Cython(URM_train) self.lowRange = P3alphaRecommender(URM_train) self.midRange = RP3betaRecommender(URM_train) #self.itemcf = ItemKNNCFRecommender(urm) def fit(self): try: self.highRange.load_model("result_experiments/range_200--1/","SLIM_BPR_Recommender_best_model") except: self.highRange.fit(topK= 100, epochs= 70, symmetric= False, sgd_mode= 'adam', lambda_i= 0.01, lambda_j= 1e-05, learning_rate= 0.0001) self.lowRange.fit(topK= 685, alpha= 0.41303525095465676, normalize_similarity= False) self.midRange.fit(topK= 979, alpha= 0.42056182126095865, beta= 0.03446674275249296, normalize_similarity= False) #self.gamma = alpha_gamma_ratio def recommend(self, user_id_array, cutoff=None, remove_seen_flag=True, items_to_compute=None, remove_top_pop_flag=False, remove_custom_items_flag = False, return_scores=False): if user_id_array in self._user_with_interactions_within(0, 50): return self.lowRange.recommend(user_id_array, cutoff=cutoff) elif user_id_array in self._user_with_interactions_within(51, 200): return self.midRange.recommend(user_id_array, cutoff=cutoff) elif user_id_array in self._user_with_interactions_over(200): return self.highRange.recommend(user_id_array, cutoff=cutoff) """recs = [] for user_id in user_id_array: if user_id in self._user_with_interactions_within(0, 200): rec = self.lowRange.recommend(user_id_array, cutoff=cutoff) elif user_id in self._user_with_interactions_over(200): rec = self.highRange.recommend(user_id_array, cutoff=cutoff) recs.append(rec) scores_batch = self._compute_item_score(user_id_array, items_to_compute=items_to_compute) return np.array(recs), scores_batch""" def _user_with_interactions_within(self, x=0, y=200): a = self.number_of_interactions_per_user x1 = np.where(a >= x) x2 = np.where(a <= y) return np.array([n for n in x1[0] if n in x2[0]]) def _user_with_interactions_over(self, x=0): a = self.number_of_interactions_per_user x1 = np.where(a > x) return np.array([n for n in x1[0]]) def save_model(self, folder_path, file_name = None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save = {}) self._print("Saving complete")
def __init__(self, urm_train, eurm=False): super(HybridNormOrigRecommender, self).__init__(urm_train) self.data_folder = Path(__file__).parent.parent.absolute() self.eurm = eurm self.num_users = urm_train.shape[0] data = DataManager() urm_train = check_matrix(urm_train.copy(), 'csr') icm_price, icm_asset, icm_sub, icm_all = data.get_icm() ucm_age, ucm_region, ucm_all = data.get_ucm() recommender_1 = ItemKNNCBFRecommender(urm_train, icm_all) recommender_1.fit(shrink=40, topK=20, feature_weighting='BM25') recommender_7 = UserKNNCBFRecommender(urm_train, ucm_all) recommender_7.fit(shrink=1777, topK=1998, similarity='tversky', feature_weighting='BM25', tversky_alpha=0.1604953616, tversky_beta=0.9862348646) # recommender_1 = HybridGenRecommender(urm_train, eurm=self.eurm) # recommender_1.fit() # recommender_2 = ItemKNNCFRecommender(urm_train) # recommender_2.fit(shrink=30, topK=20) recommender_2 = ItemKNNCFRecommender(urm_train) recommender_2.fit(topK=5, shrink=500, feature_weighting='BM25', similarity='tversky', normalize=False, tversky_alpha=0.0, tversky_beta=1.0) recommender_3 = UserKNNCFRecommender(urm_train) recommender_3.fit(shrink=2, topK=600, normalize=True) # recommender_3 = UserKNNCFRecommender(urm_train) # recommender_3.fit(topK=697, shrink=1000, feature_weighting='TF-IDF', similarity='tversky', normalize=False, # tversky_alpha=1.0, tversky_beta=1.0) recommender_4 = RP3betaRecommender(urm_train) recommender_4.fit(topK=16, alpha=0.03374950051351756, beta=0.24087176329409027, normalize_similarity=False) recommender_5 = SLIM_BPR_Cython(urm_train) recommender_5.fit(lambda_i=0.0926694015, lambda_j=0.001697250, learning_rate=0.002391, epochs=65, topK=200) recommender_6 = ALSRecommender(urm_train) recommender_6.fit(alpha=5, iterations=40, reg=0.3) self.recommender_1 = recommender_1 self.recommender_2 = recommender_2 self.recommender_3 = recommender_3 self.recommender_4 = recommender_4 self.recommender_5 = recommender_5 self.recommender_6 = recommender_6 self.recommender_7 = recommender_7 if self.eurm: if Path(self.data_folder / 'Data/uicm_orig_sparse.npz').is_file(): print("Previous uicm_sparse found") self.score_matrix_1 = sps.load_npz(self.data_folder / 'Data/uicm_sparse.npz') else: print("uicm_sparse not found, create new one...") self.score_matrix_1 = self.recommender_1._compute_item_matrix_score( np.arange(self.num_users)) sps.save_npz(self.data_folder / 'Data/uicm_orig_sparse.npz', self.score_matrix_1) self.score_matrix_2 = self.recommender_2._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_3 = self.recommender_3._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_4 = self.recommender_4._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_5 = self.recommender_5._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_6 = self.recommender_6._compute_item_score( np.arange(self.num_users)) self.score_matrix_1 = normalize(self.score_matrix_2, norm='max', axis=1) self.score_matrix_2 = normalize(self.score_matrix_2, norm='max', axis=1) self.score_matrix_3 = normalize(self.score_matrix_3, norm='max', axis=1) self.score_matrix_4 = normalize(self.score_matrix_4, norm='max', axis=1) self.score_matrix_5 = normalize(self.score_matrix_5, norm='max', axis=1) self.score_matrix_6 = normalize(self.score_matrix_6, norm='max', axis=1)
ws = [] base_recommenders = [] data = None description_list = [] for i in range(anti_overfitting_generation): data_reader = DataReader() data = DataObject(data_reader, 1, random_seed=(20 + i)) # TODO: Edit here # Change the recommenders rec1 = SLIM_BPR_Cython(data.urm_train) rec1.fit(sgd_mode="adagrad", topK=30, epochs=150, learning_rate=1e-05, lambda_i=1, lambda_j=0.001) description_list.append( f"SLIM_BPR sgd_mode=adagrad, topK=30, epochs=150, learning_rate=1e-05, lambda_i=1, lambda_j=0.001" ) rec2 = ItemKNNCFRecommender(data.urm_train) rec2.fit(topK=10, shrink=30, similarity="tanimoto") description_list.append( f"Item CF topK=10, shrink=30, similarity=tanimoto") rec3 = RP3betaRecommender(data.urm_train) rec3.fit(topK=20, alpha=0.16, beta=0.24, implicit=True, normalize_similarity=True)
class NTeslaEnsemble: def __init__(self, urm_train, urm_test, icm, parameters=None): if parameters is None: parameters = { "USER_CF" : 0.8, "USER_BPR" : 0.7, "ITEM_CF" : 1, "ITEM_BPR" : 0.8, "CBF" : 0.3, "IALS" : 1.0, "CBF_BPR" : 1 } self.ensemble_weights = parameters self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.initialize_components() def initialize_components(self): self.bm_recommender = BMussoliniEnsemble(urm_train=self.train, urm_test=self.test, icm=self.icm, parameters=self.ensemble_weights) self.item_bpr_recommender = SLIM_BPR_Cython(self.train, positive_threshold=0) self.user_bpr_recommender = SLIM_BPR_Cython(self.train.T, positive_threshold=0) def fit(self): self.bm_recommender.fit() self.item_bpr_w = self.item_bpr_recommender.fit(epochs=15, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) self.user_bpr_w = self.user_bpr_recommender.fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) def recommend(self, user_id, combiner, at=10): user_profile = self.train[user_id, :] bm_list = self.bm_recommender.recommend(user_id, linearCombiner(), at=40) item_bpr_r = preprocessing.normalize(user_profile.dot(self.item_bpr_w).toarray(), norm='l2').ravel() user_bpr_r = preprocessing.normalize(self.user_bpr_w[user_id].dot(self.train).toarray(), norm='l2').ravel() bpr_scores = [ [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR"], [user_bpr_r, self.ensemble_weights["USER_BPR"], "USER_BPR"] ] for r in bpr_scores: self.filter_seen(user_id, r[0]) bpr_list = (linearCombiner()).combine(bpr_scores, at=40) lists = [ [bm_list], [bpr_list] ] return (ProbabilisticCombiner()).combine(scores=lists, alpha=self.ensemble_weights["ALPHA"], at=at) def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -1000000 #-np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n=0 for i in user_list: recList = self.recommend(i, combiner, at) tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): return { "ITEM_CF" : 1, "USER_CF": 1 , # "ITEM_BPR" : item_bpr , "IALS" : 1, "CBF" : 1, "CBF_BPR" : 1 }
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() # SPLIT TO GET TEST PARTITION URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.90, seed=seed) # SPLIT TO GET THE HYBRID VALID PARTITION URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) # SPLIT TO GET THE sub_rec VALID PARTITION URM_train_bis, URM_valid_sub = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) collaborative_algorithm_list = [ #EASE_R_Recommender #PipeHybrid001, #Random, #TopPop, #P3alphaRecommender, #RP3betaRecommender, #ItemKNNCFRecommender, #UserKNNCFRecommender, #MatrixFactorization_BPR_Cython, #MatrixFactorization_FunkSVD_Cython, #PureSVDRecommender, #NMFRecommender, #PureSVDItemRecommender #SLIM_BPR_Cython, #SLIMElasticNetRecommender #IALSRecommender #MF_MSE_PyTorch #MergedHybrid000 #LinearHybrid002ggg HybridCombinationSearch ] content_algorithm_list = [ #ItemKNNCBFRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10]) evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) """ # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Content_partial = partial(runParameterSearch_Content, URM_train=URM_train, ICM_object=ICM_obj, ICM_name='1BookFeatures', n_cases = 50, n_random_starts = 20, evaluator_validation= evaluator_valid_sub, evaluator_test = evaluator_valid_hybrid, metric_to_optimize = "MAP", output_folder_path=output_folder_path, parallelizeKNN = False, allow_weighting = True, #similarity_type_list = ['cosine'] ) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Content_partial, content_algorithm_list) """ print("Rp3beta training...") rp3b = RP3betaRecommender(URM_train, verbose=False) rp3b_params = { 'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False } rp3b.fit(**rp3b_params) print("Done") print("P3alpha training...") p3a = P3alphaRecommender(URM_train, verbose=False) p3a_params = { 'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False } p3a.fit(**p3a_params) print("Done") print("ItemKnnCF training...") icf = ItemKNNCFRecommender(URM_train, verbose=False) icf_params = { 'topK': 100, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0 } icf.fit(**icf_params) print("Done") print("UserKnnCF training...") ucf = UserKNNCFRecommender(URM_train, verbose=False) ucf_params = { 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True } ucf.fit(**ucf_params) print("Done") print("ItemKnnCBF training...") icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False) icb_params = { 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } icb.fit(**icb_params) print("Done") print("SlimBPR training...") sbpr = SLIM_BPR_Cython(URM_train, verbose=False) sbpr_params = { 'topK': 979, 'epochs': 130, 'symmetric': False, 'sgd_mode': 'adam', 'lambda_i': 0.004947329669424629, 'lambda_j': 1.1534760845071758e-05, 'learning_rate': 0.0001 } sbpr.fit(**sbpr_params) print("Done") print("SlimElasticNet training...") sen = SLIMElasticNetRecommender(URM_train, verbose=False) sen_params = { 'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143 } sen.fit(**sen_params) print("Done") list_recommender = [rp3b, p3a, icf, ucf, icb, sen, sbpr] list_already_seen = [rp3b, p3a, icf, ucf, icb] for rec_perm in combinations(list_recommender, 3): if rec_perm not in combinations(list_already_seen, 3): recommender_names = '_'.join( [r.RECOMMENDER_NAME for r in rec_perm]) output_folder_path = "result_experiments_v3/seed_" + str( seed) + '/' + recommender_names + '/' # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, evaluator_validation_earlystopping=evaluator_valid_hybrid, evaluator_validation=evaluator_valid_hybrid, evaluator_test=evaluator_test, output_folder_path=output_folder_path, allow_weighting=False, #similarity_type_list = ["cosine", 'jaccard'], parallelizeKNN=False, list_rec=rec_perm) pool = multiprocessing.Pool(processes=int( multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
slim_param[slim_param_name[7]] = bsList[i] slim_params_list.append(slim_param) #for i in slim_params_list: # print(i) # Store the value loopTimes = 10 slim_MAPs = np.zeros([len(slim_params_list), loopTimes + 1]) for j in range(loopTimes): URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8) for i, slim_param in enumerate(slim_params_list): if slim_MAPs[i][loopTimes] == 1: continue recommender = SLIM_BPR_Cython(URM_train) recommender.fit(**slim_param) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10]) eval_res = evaluator_validation.evaluateRecommender(recommender) MAP = eval_res[0][10]['MAP'] # MAP = 1 print("The MAP for {} th params is {}".format(i, MAP)) slim_MAPs[i][j] = MAP if MAP < 0.02: slim_MAPs[i][10] = 1 print(slim_MAPs) np.savetxt("./evalRes/slim.csv", slim_MAPs, delimiter=",") # Define the combine ratio lineParam = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
def slim_tuning(f, epochs=300, train_with_sparse_weights=None, symmetric=True, verbose=False, random_seed=None, batch_size=1000, lambda_i=0.0, lambda_j=0.0, learning_rate=1e-4, topK=200, sgd_mode='adagrad', gamma=0.995, beta_1=0.9, beta_2=0.999): seed = 13 map_dict = {} description_list = [] flag = 0 k_fold = 2 for i in range(0, k_fold): data_reader = DataReader() data = DataObject(data_reader, k=1, random_seed=seed) rec = SLIM_BPR_Cython(data.urm_train) rec.fit(epochs=epochs, train_with_sparse_weights=train_with_sparse_weights, symmetric=symmetric, verbose=verbose, random_seed=random_seed, batch_size=batch_size, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK, sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2) # initializing the description list if (len(description_list) == 0): for _, _, description in data.urm_train_users_by_type: description_list.append(description) # initializing the dictionary if (flag == 0): flag = 1 for d in description_list: map_dict[d] = 0 for n, users, description in data.urm_train_users_by_type: eval, map = MyEvaluator.evaluate_algorithm(data.urm_test, users, rec, at=10, remove_top=0) print(f"\t {description},\t {eval}") map_dict[description] += map print( "epochs={}, train_with_sparse_weights = {},symmetric = {}, batch_size = {}," .format(epochs, train_with_sparse_weights, symmetric, batch_size)) print( "lambda_i = {}, lambda_j = {}, learning_rate = {}, topK = {},".format( lambda_i, lambda_j, learning_rate, topK)) print("sgd_mode={}, sgd_mode={}, beta_1={}, beta_2={}".format( sgd_mode, sgd_mode, beta_1, beta_2)) for d in description_list: map_dict[d] /= k_fold print(d + "\t\t" + "average map: " + str(map_dict[d])) f.write( "\nepochs={}, train_with_sparse_weights = {},symmetric = {}, batch_size = {}," .format(epochs, train_with_sparse_weights, symmetric, batch_size)) f.write("\nlambda_i = {}, lambda_j = {}, learning_rate = {}, topK = {},". format(lambda_i, lambda_j, learning_rate, topK)) f.write("\nsgd_mode={}, sgd_mode={}, beta_1={}, beta_2={}".format( sgd_mode, sgd_mode, beta_1, beta_2)) for d in description_list: f.write("\n" + d + "\t\t" + "average map: " + str(map_dict[d])) f.write("\n\n") f.flush()
class BMussoliniEnsemble: def __init__(self, urm_train, urm_test, icm): self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.initialize_components() def initialize_components(self): self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0) def fit(self): self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) def recommend(self, user_id, combiner, at=10): user_profile = self.train[user_id, :] cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel() scores = [[cbf_bpr_r, "1", "CBF_BPR"]] for r in scores: self.filter_seen(user_id, r[0]) return combiner.combine(scores, at) def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -1000000 #-np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n = 0 for i in user_list: recList = self.recommend(i, combiner, at).T tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): cbf_bpr_rating = 1 * self.train.dot(self.cbf_bpr_w) cbf_bpr = { "min": cbf_bpr_rating.min(), "max": cbf_bpr_rating.max(), "mean": cbf_bpr_rating.mean(), } del cbf_bpr_rating return {"CBF_BPR": cbf_bpr}
class NapoEnsemble: def __init__(self, urm_train, urm_test, icm, parameters=None): if parameters is None: parameters = { "USER_CF": 0.8, "SVD": 0.7, "ITEM_CF": 1, "ITEM_BPR": 0.8, "CBF": 0.3, "IALS": 1.0, "CBF_BPR": 1 } self.ensemble_weights = parameters self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.initialize_components() def initialize_components(self): self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine') self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine') self.item_bpr_recommender = SLIM_BPR_Cython(self.train, positive_threshold=0) self.svd_recommender = PureSVDRecommender(self.train) self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0) self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine') if self.ensemble_weights["IALS"] == 0: self.ials_recommender = IALS_numpy(iters=0) else: self.ials_recommender = IALS_numpy() def fit(self): self.item_bpr_w = self.item_bpr_recommender.fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit( num_factors=500) self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity( ) self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity( ) self.cbf_w = self.cbf_recommender.compute_similarity() self.ials_latent_x, self.ials_latent_y = self.ials_recommender.fit( R=self.train) self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min() self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min() def recommend(self, user_id, combiner, at=10): user_profile = self.train[user_id, :] item_bpr_r = user_profile.dot(self.item_bpr_w).toarray().ravel() svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y) item_cosineCF_r = user_profile.dot( self.item_cosineCF_w).toarray().ravel() user_cosineCF_r = self.user_cosineCF_w[user_id].dot( self.train).toarray().ravel() cbf_r = user_profile.dot(self.cbf_w).toarray().ravel() cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel() ials_r = np.dot(self.ials_latent_x[user_id], self.ials_latent_y.T + self.min_ials).ravel() scores = [ [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR"], [svd_r, self.ensemble_weights["SVD"], "SVD"], [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF"], [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF"], [ials_r, self.ensemble_weights["IALS"], "IALS"], [cbf_r, self.ensemble_weights["CBF"], "CBF"], [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"] ] for r in scores: self.filter_seen(user_id, r[0]) return combiner.combine(scores, at) def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n = 0 for i in user_list: recList = self.recommend(i, combiner, at).T tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): item_cf_rating = self.ensemble_weights["ITEM_CF"] * self.train.dot( self.item_cosineCF_w) item_cf = { "min": item_cf_rating.min(), "max": item_cf_rating.max(), "mean": item_cf_rating.mean(), } del item_cf_rating user_cf_rating = self.ensemble_weights[ "USER_CF"] * self.user_cosineCF_w.dot(self.train) user_cf = { "min": user_cf_rating.min(), "max": user_cf_rating.max(), "mean": user_cf_rating.mean(), } del user_cf_rating svd_ratings = self.ensemble_weights["SVD"] * ( np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd) user_bpr = { "min": svd_ratings.min(), "max": svd_ratings.max(), "mean": svd_ratings.mean(), } del svd_ratings item_bpr_rating = self.ensemble_weights["ITEM_BPR"] * self.train.dot( self.item_bpr_w) item_bpr = { "min": item_bpr_rating.min(), "max": item_bpr_rating.max(), "mean": item_bpr_rating.mean(), } del item_bpr_rating ials_rating = self.ensemble_weights["IALS"] * ( np.dot(self.ials_latent_x, self.ials_latent_y.T) + self.min_ials) ials = { "min": ials_rating.min(), "max": ials_rating.max(), "mean": np.mean(ials_rating), } del ials_rating cbf_rating = self.ensemble_weights["CBF"] * self.train.dot(self.cbf_w) cbf = { "min": cbf_rating.min(), "max": cbf_rating.max(), "mean": cbf_rating.mean(), } del cbf_rating cbf_bpr_rating = self.ensemble_weights["CBF_BPR"] * self.train.dot( self.cbf_bpr_w) cbf_bpr = { "min": cbf_bpr_rating.min(), "max": cbf_bpr_rating.max(), "mean": cbf_bpr_rating.mean(), } del cbf_bpr_rating return { "ITEM_CF": item_cf, "SVD": user_cf, "ITEM_BPR": item_bpr, "USER_BPR": user_bpr, "IALS": ials, "CBF": cbf, "CBF_BPR": cbf_bpr }
class BMussoliniEnsemble: def __init__(self, urm_train, urm_test, icm, parameters=None): if parameters is None: parameters = { "USER_CF" : 7, "SVD" : 26, "ITEM_CF" : 0, "ITEM_BPR" : 16, "CBF" : 7, "IALS" : 26, "CBF_BPR" : 64, "BPR_MF": 6, "ITEM_RP3B": 16, "USER_RP3B": 0, "FM": 10 } self.ensemble_weights = parameters self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.sequential_playlists = None self.sequential_playlists = load_sequential.load_train_sequential() self.initialize_components() def initialize_components(self): self.train = self.rescale_wrt_insertion_order(self.train) self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine') self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine') self.svd_recommender = PureSVDRecommender(self.train) self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0) self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine') self.item_rp3b_recommender = RP3betaRecommender(self.train) self.user_rp3b_recommender = RP3betaRecommender(self.train.T) self.bpr_mf = BPR_matrix_factorization(factors=800, regularization=0.01, learning_rate=0.01, iterations=300) self.ials_cg_mf = IALS_CG(iterations=15, calculate_training_loss=True, factors=500, use_cg=True, regularization=1e-3) self.lightfm = LightFM_Recommender(self.train, self.icm, no_components=200) def fit(self): self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit(num_factors=500) self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min() self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10, topK=200, batch_size=20, sgd_mode='adagrad', learning_rate=1e-2) self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity() self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity() self.cbf_w = self.cbf_recommender.compute_similarity() self.item_rp3b_w = self.item_rp3b_recommender.fit() self.user_rp3b_w = self.user_rp3b_recommender.fit() self.ials_cg_mf.fit(40*self.train.T) self.ials_latent_x = self.ials_cg_mf.user_factors.copy() self.ials_latent_y = self.ials_cg_mf.item_factors.copy() self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min() self.bpr_mf.fit(self.train.T.tocoo()) self.bpr_mf_latent_x = self.bpr_mf.user_factors.copy() self.bpr_mf_latent_y = self.bpr_mf.item_factors.copy() self.lightfm.fit(100) def recommend(self, user_id, combiner, at=10): user_profile = self.train[user_id, :] svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y) item_cosineCF_r = user_profile.dot(self.item_cosineCF_w).toarray().ravel() user_cosineCF_r = self.user_cosineCF_w[user_id].dot(self.train).toarray().ravel() cbf_r = user_profile.dot(self.cbf_w).toarray().ravel() cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel() ials_r = np.dot(self.ials_latent_x[user_id], self.ials_latent_y.T + self.min_ials).ravel() bpr_mf_r = np.dot(self.bpr_mf_latent_x[user_id], self.bpr_mf_latent_y.T).ravel() item_rp3b_r = user_profile.dot(self.item_rp3b_w).toarray().ravel() user_rp3b_r = self.user_rp3b_w[user_id].dot(self.train).toarray().ravel() lightfm_r = self.lightfm.scores(user_id) scores = [ # [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR" ], # [user_bpr_r, self.ensemble_weights["USER_BPR"], "USER_BPR" ], [svd_r, self.ensemble_weights["SVD"], "SVD"], [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF" ], [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF" ], [ials_r, self.ensemble_weights["IALS"], "IALS" ], [cbf_r, self.ensemble_weights["CBF"], "CBF" ], [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"], [bpr_mf_r, self.ensemble_weights["BPR_MF"], "BPR_MF"], [item_rp3b_r, self.ensemble_weights["ITEM_RP3B"], "ITEM_RP3B"], [user_rp3b_r, self.ensemble_weights["USER_RP3B"], "USER_RP3B"], [lightfm_r, self.ensemble_weights["FM"], "FM"] ] for r in scores: self.filter_seen(user_id, r[0]) R = combiner.combine(scores, at) return R def rescale_wrt_insertion_order(self, R): R = R.copy() R = R.tolil() R = R*0.8 for i in self.sequential_playlists: pl = i["id"] k = 1 for j in i["songs"]: factor = 1/(k**POPULARITY_SCALING_EXP) R[pl, j] = factor*(R[pl,j] + 0.2) k += 1 R = R.tocsr() return R def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -1000000 #-np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n=0 for i in user_list: recList = self.recommend(i, combiner, at).T tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): item_cf_rating = self.ensemble_weights["ITEM_CF"]*self.train.dot(self.item_cosineCF_w) item_cf = { "min" : item_cf_rating.min(), "max" : item_cf_rating.max(), "mean" : item_cf_rating.mean(), } del item_cf_rating user_cf_rating = self.ensemble_weights["USER_CF"]*self.user_cosineCF_w.dot(self.train) user_cf = { "min": user_cf_rating.min(), "max": user_cf_rating.max(), "mean": user_cf_rating.mean(), } del user_cf_rating ials_rating = self.ensemble_weights["IALS"]*(np.dot(self.ials_latent_x, self.ials_latent_y.T)+self.min_ials) ials = { "min": ials_rating.min(), "max": ials_rating.max(), "mean": np.mean(ials_rating), } del ials_rating cbf_rating = self.ensemble_weights["CBF"]*self.train.dot(self.cbf_w) cbf = { "min": cbf_rating.min(), "max": cbf_rating.max(), "mean": cbf_rating.mean(), } del cbf_rating cbf_bpr_rating = self.ensemble_weights["CBF_BPR"]*self.train.dot(self.cbf_bpr_w) cbf_bpr = { "min": cbf_bpr_rating.min(), "max": cbf_bpr_rating.max(), "mean": cbf_bpr_rating.mean(), } del cbf_bpr_rating svd_ratings = self.ensemble_weights["SVD"] * (np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd) svd = { "min": svd_ratings.min(), "max": svd_ratings.max(), "mean": svd_ratings.mean(), } del svd_ratings return { "ITEM_CF" : item_cf, "USER_CF": user_cf , "SVD" : svd , "IALS" : ials, "CBF" : cbf, "CBF_BPR" : cbf_bpr }
'implicit': [False, False, False] } alpha1 = 0.4 alpha2 = 0.5399999999999999 alpha3 = 0.06000000000000005 print( "***************************Ensure the parameter is good**********************" ) URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8) itemCF_recommender = ItemKNNCFRecommender(URM_train) itemCF_recommender.fit(**itemCFParam) slim_recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False) slim_recommender.fit(**slimParam) p3_recommender = P3alphaRecommender(URM_train) p3_recommender.fit(**p3Param) recommender1 = SimilarityHybridRecommender(URM_train, itemCF_recommender.W_sparse, slim_recommender.W_sparse, p3_recommender.W_sparse) recommender1.fit(topK=100, alpha1=alpha1, alpha2=alpha2, alpha3=alpha3) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10]) eval_res = evaluator_validation.evaluateRecommender(recommender1) MAP = eval_res[0][10]['MAP'] print("The MAP in one test is: ", MAP) itemCF_recommender = ItemKNNCFRecommender(URM_all)
class LinearHybrid006(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "LinearHybrid006" # set the seed equal to the one of the parameter search!!!! def __init__(self, URM_train, ICM_train, submission=False, verbose=True, seed=1205): super(LinearHybrid006, self).__init__(URM_train, verbose=verbose) self.URM_train = URM_train self.ICM_train = ICM_train self.__rec1 = P3alphaRecommender(URM_train, verbose=False) self.__rec1_params = { 'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False } self.__rec2 = ItemKNNCBFRecommender(URM_train, ICM_train, verbose=False) self.__rec2_params = { 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } self.__rec3 = SLIM_BPR_Cython(URM_train, verbose=False) self.__rec3_params = { 'topK': 979, 'epochs': 130, 'symmetric': False, 'sgd_mode': 'adam', 'lambda_i': 0.004947329669424629, 'lambda_j': 1.1534760845071758e-05, 'learning_rate': 0.0001 } self.__a = self.__b = self.__c = None self.seed = seed self.__submission = submission def fit(self, alpha=0.5, l1_ratio=0.5): self.__a = alpha * l1_ratio self.__b = alpha - self.__a self.__c = 1 - self.__a - self.__b if not self.__submission: try: self.__rec1.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec1.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...") self.__rec1.fit(**self.__rec1_params) print(f"done.") self.__rec1.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec2.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec2.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...") self.__rec2.fit(**self.__rec2_params) print(f"done.") self.__rec2.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec3.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec3.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...") self.__rec3.fit(**self.__rec3_params) print(f"done.") self.__rec3.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') else: self.__rec1.fit(**self.__rec1_params) self.__rec2.fit(**self.__rec2_params) self.__rec3.fit(**self.__rec3_params) def _compute_item_score(self, user_id_array, items_to_compute=None): item_weights_1 = self.__rec1._compute_item_score(user_id_array) item_weights_2 = self.__rec2._compute_item_score(user_id_array) item_weights_3 = self.__rec3._compute_item_score(user_id_array) item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c return item_weights def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save={}) self._print("Saving complete")