def set_URM_train(self, URM_train_new, estimate_model_for_cold_users = False, topK = 100, **kwargs): """ :param URM_train_new: :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm :param topK: 100 :param kwargs: :return: """ assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format(self.RECOMMENDER_NAME) if len(kwargs)>0: print("{}: set_URM_train keyword arguments not supported for this recommender class. Received: {}".format(self.RECOMMENDER_NAME, kwargs)) self.URM_train = check_matrix(URM_train_new.copy(), 'csr', dtype=np.float32) self.URM_train.eliminate_zeros() if estimate_model_for_cold_users == "itemKNN": print("{}: Estimating ItemKNN model from ITEM latent factors...".format(self.RECOMMENDER_NAME)) W_sparse = compute_W_sparse_from_item_latent_factors(self.ITEM_factors, topK=topK) self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(self.URM_train) self._ItemKNNRecommender.fit(W_sparse, topK=topK) self._cold_user_KNN_model_available = True self._warm_user_KNN_mask = np.ediff1d(self.URM_train.indptr) > 0 print("{}: Estimating ItemKNN model from ITEM latent factors... done!".format(self.RECOMMENDER_NAME)) elif estimate_model_for_cold_users == "mean_item_factors": print("{}: Estimating USER latent factors from ITEM latent factors...".format(self.RECOMMENDER_NAME)) self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0 profile_length = np.ediff1d(self.URM_train.indptr) profile_length_sqrt = np.sqrt(profile_length) self.USER_factors = self.URM_train.dot(self.ITEM_factors) #Divide every row for the sqrt of the profile length for user_index in range(self.n_users): if profile_length_sqrt[user_index] > 0: self.USER_factors[user_index, :] /= profile_length_sqrt[user_index] print("{}: Estimating USER latent factors from ITEM latent factors... done!".format(self.RECOMMENDER_NAME))
def load_model(self, folder_path, file_name=None): super(BaseMatrixFactorizationRecommender, self).load_model(folder_path, file_name=file_name) if self._cold_user_KNN_model_flag: self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender( self.URM_train) self._ItemKNNRecommender.fit(self._ItemKNNRecommender_W_sparse, topK=self._ItemKNNRecommender_topK) del self._ItemKNNRecommender_W_sparse del self._ItemKNNRecommender_topK
def load_model(self, folder_path, file_name=None): super(BaseMatrixFactorizationRecommender, self).load_model(folder_path, file_name=file_name) self.mf_recommender.load_model(folder_path, file_name=file_name + "_warm_users") if self._cold_user_KNN_model_flag: self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender( self.URM_train) self._ItemKNNRecommender.load_model(folder_path, file_name=file_name + "_cold_users")
def computeMaxTheoreticalPerformance(self): # Max performance would be if we were able to learn the content matrix having for each non-zero cell exactly # the value that appears in the collaborative similarity print(self.RECOMMENDER_NAME + ": Computing collaborative performance") recommender = ItemKNNCustomSimilarityRecommender() recommender.fit(self.S_matrix_target, self.URM_train) results_run = self.evaluator_object(recommender) self.writeLog(self.RECOMMENDER_NAME + ": Collaborative performance is: {}".format(results_run)) print(self.RECOMMENDER_NAME + ": Computing top structural performance") n_items = self.ICM.shape[0] S_optimal = sps.csr_matrix( (self.data_list, (self.row_list, self.col_list)), shape=(n_items, n_items)) S_optimal.eliminate_zeros() recommender = ItemKNNCustomSimilarityRecommender() recommender.fit(S_optimal, self.URM_train) results_run = self.evaluator_object(recommender) self.writeLog( self.RECOMMENDER_NAME + ": Top structural performance is: {}".format(results_run))
class BaseMatrixFactorizationRecommender(BaseRecommender): """ This class refers to a BaseRecommender KNN which uses matrix factorization, it provides functions to compute item's score as well as a function to save the W_matrix The prediction for cold users will always be -inf for ALL items """ def __init__(self, URM_train): super(BaseMatrixFactorizationRecommender, self).__init__(URM_train) self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0 self._cold_user_KNN_model_available = False self._warm_user_KNN_mask = np.zeros(len(self._cold_user_mask), dtype=np.bool) if self._cold_user_mask.any(): print("{}: Detected {} ({:.2f} %) cold users.".format( self.RECOMMENDER_NAME, self._cold_user_mask.sum(), self._cold_user_mask.sum() / len(self._cold_user_mask) * 100)) def _get_cold_user_mask(self): return self._cold_user_mask def set_URM_train(self, URM_train_new, estimate_model_for_cold_users=False, topK=100, **kwargs): """ :param URM_train_new: :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm :param topK: 100 :param kwargs: :return: """ assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format( self.RECOMMENDER_NAME) if len(kwargs) > 0: print( "{}: set_URM_train keyword arguments not supported for this recommender class. Received: {}" .format(self.RECOMMENDER_NAME, kwargs)) self.URM_train = check_matrix(URM_train_new.copy(), 'csr', dtype=np.float32) self.URM_train.eliminate_zeros() if estimate_model_for_cold_users == "itemKNN": print("{}: Estimating ItemKNN model from ITEM latent factors...". format(self.RECOMMENDER_NAME)) W_sparse = compute_W_sparse_from_item_latent_factors( self.ITEM_factors, topK=topK) self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender( self.URM_train) self._ItemKNNRecommender.fit(W_sparse, topK=topK) self._cold_user_KNN_model_available = True self._warm_user_KNN_mask = np.ediff1d(self.URM_train.indptr) > 0 print( "{}: Estimating ItemKNN model from ITEM latent factors... done!" .format(self.RECOMMENDER_NAME)) elif estimate_model_for_cold_users == "mean_item_factors": print( "{}: Estimating USER latent factors from ITEM latent factors..." .format(self.RECOMMENDER_NAME)) self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0 profile_length = np.ediff1d(self.URM_train.indptr) profile_length_sqrt = np.sqrt(profile_length) self.USER_factors = self.URM_train.dot(self.ITEM_factors) #Divide every row for the sqrt of the profile length for user_index in range(self.n_users): if profile_length_sqrt[user_index] > 0: self.USER_factors[ user_index, :] /= profile_length_sqrt[user_index] print( "{}: Estimating USER latent factors from ITEM latent factors... done!" .format(self.RECOMMENDER_NAME)) def _compute_item_score(self, user_id_array, items_to_compute=None): """ USER_factors is n_users x n_factors ITEM_factors is n_items x n_factors The prediction for cold users will always be -inf for ALL items :param user_id_array: :param items_to_compute: :return: """ assert self.USER_factors.shape[1] == self.ITEM_factors.shape[1], \ "{}: User and Item factors have inconsistent shape".format(self.RECOMMENDER_NAME) assert self.USER_factors.shape[0] > user_id_array.max(),\ "{}: Cold users not allowed. Users in trained model are {}, requested prediction for users up to {}".format( self.RECOMMENDER_NAME, self.USER_factors.shape[0], user_id_array.max()) if items_to_compute is not None: item_scores = -np.ones( (len(user_id_array), self.ITEM_factors.shape[0]), dtype=np.float32) * np.inf item_scores[:, items_to_compute] = np.dot( self.USER_factors[user_id_array], self.ITEM_factors[items_to_compute, :].T) else: item_scores = np.dot(self.USER_factors[user_id_array], self.ITEM_factors.T) cold_users_MF_mask = self._get_cold_user_mask()[user_id_array] if cold_users_MF_mask.any(): if self._cold_user_KNN_model_available: # Add KNN scores for users cold for MF but warm in KNN model cold_users_in_MF_warm_in_KNN_mask = np.logical_and( cold_users_MF_mask, self._warm_user_KNN_mask[user_id_array]) item_scores[ cold_users_in_MF_warm_in_KNN_mask, :] = self._ItemKNNRecommender._compute_item_score( user_id_array[cold_users_in_MF_warm_in_KNN_mask], items_to_compute=items_to_compute) # Set cold users as those neither in MF nor in KNN cold_users_MF_mask = np.logical_and( cold_users_MF_mask, np.logical_not(cold_users_in_MF_warm_in_KNN_mask)) # Set as -inf all remaining cold user scores item_scores[cold_users_MF_mask, :] = -np.ones_like( item_scores[cold_users_MF_mask, :]) * np.inf return item_scores def saveModel(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME, folder_path + file_name)) dictionary_to_save = { "USER_factors": self.USER_factors, "ITEM_factors": self.ITEM_factors, "_cold_user_mask": self._cold_user_mask } pickle.dump(dictionary_to_save, open(folder_path + file_name, "wb"), protocol=pickle.HIGHEST_PROTOCOL) print("{}: Saving complete".format(self.RECOMMENDER_NAME, folder_path + file_name))
class BaseMatrixFactorizationRecommender(BaseRecommender): """ This class refers to a BaseRecommender KNN which uses matrix factorization, it provides functions to compute item's score as well as a function to save the W_matrix The prediction for cold users will always be -inf for ALL items """ def __init__(self, URM_train): super(BaseMatrixFactorizationRecommender, self).__init__(URM_train) self.use_bias = False self._cold_user_KNN_model_available = False self._warm_user_KNN_mask = np.zeros(len(self._get_cold_user_mask()), dtype=np.bool) def set_URM_train(self, URM_train_new, estimate_model_for_cold_users=False, topK=100, **kwargs): """ :param URM_train_new: :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm :param topK: 100 :param kwargs: :return: """ assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format( self.RECOMMENDER_NAME) if len(kwargs) > 0: print( "{}: set_URM_train keyword arguments not supported for this recommender class. Received: {}" .format(self.RECOMMENDER_NAME, kwargs)) self.URM_train = check_matrix(URM_train_new.copy(), 'csr', dtype=np.float32) self.URM_train.eliminate_zeros() if estimate_model_for_cold_users == "itemKNN": print("{}: Estimating ItemKNN model from ITEM latent factors...". format(self.RECOMMENDER_NAME)) W_sparse = compute_W_sparse_from_item_latent_factors( self.ITEM_factors, topK=topK) self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender( self.URM_train) self._ItemKNNRecommender.fit(W_sparse, topK=topK) self._cold_user_KNN_model_available = True self._warm_user_KNN_mask = np.ediff1d(self.URM_train.indptr) > 0 print( "{}: Estimating ItemKNN model from ITEM latent factors... done!" .format(self.RECOMMENDER_NAME)) elif estimate_model_for_cold_users == "mean_item_factors": print( "{}: Estimating USER latent factors from ITEM latent factors..." .format(self.RECOMMENDER_NAME)) self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0 profile_length = np.ediff1d(self.URM_train.indptr) profile_length_sqrt = np.sqrt(profile_length) self.USER_factors = self.URM_train.dot(self.ITEM_factors) #Divide every row for the sqrt of the profile length for user_index in range(self.n_users): if profile_length_sqrt[user_index] > 0: self.USER_factors[ user_index, :] /= profile_length_sqrt[user_index] print( "{}: Estimating USER latent factors from ITEM latent factors... done!" .format(self.RECOMMENDER_NAME)) ######################################################################################################### ########## ########## ########## COMPUTE ITEM SCORES ########## ########## ########## ######################################################################################################### def _compute_item_score(self, user_id_array, items_to_compute=None): """ USER_factors is n_users x n_factors ITEM_factors is n_items x n_factors The prediction for cold users will always be -inf for ALL items :param user_id_array: :param items_to_compute: :return: """ assert self.USER_factors.shape[1] == self.ITEM_factors.shape[1], \ "{}: User and Item factors have inconsistent shape".format(self.RECOMMENDER_NAME) assert self.USER_factors.shape[0] > user_id_array.max(),\ "{}: Cold users not allowed. Users in trained model are {}, requested prediction for users up to {}".format( self.RECOMMENDER_NAME, self.USER_factors.shape[0], user_id_array.max()) if items_to_compute is not None: item_scores = -np.ones( (len(user_id_array), self.ITEM_factors.shape[0]), dtype=np.float32) * np.inf item_scores[:, items_to_compute] = np.dot( self.USER_factors[user_id_array], self.ITEM_factors[items_to_compute, :].T) else: item_scores = np.dot(self.USER_factors[user_id_array], self.ITEM_factors.T) # No need to select only the specific negative items or warm users because the -inf score will not change if self.use_bias: item_scores += self.ITEM_bias + self.GLOBAL_bias item_scores = (item_scores.T + self.USER_bias[user_id_array]).T item_scores = self._compute_item_score_postprocess_for_cold_users( user_id_array, item_scores) item_scores = self._compute_item_score_postprocess_for_cold_items( item_scores) return item_scores def _compute_item_score_postprocess_for_cold_users(self, user_id_array, item_scores): """ Remove cold users from the computed item scores, setting them to -inf Or estimate user factors with specified method :param user_id_array: :param item_scores: :return: """ cold_users_batch_mask = self._get_cold_user_mask()[user_id_array] # Set as -inf all cold user scores if cold_users_batch_mask.any(): if self._cold_user_KNN_model_available: # Add KNN scores for users cold for MF but warm in KNN model cold_users_in_MF_warm_in_KNN_mask = np.logical_and( cold_users_batch_mask, self._warm_user_KNN_mask[user_id_array]) item_scores[ cold_users_in_MF_warm_in_KNN_mask, :] = self._ItemKNNRecommender._compute_item_score( user_id_array[cold_users_in_MF_warm_in_KNN_mask], items_to_compute=items_to_compute) # Set cold users as those neither in MF nor in KNN cold_users_batch_mask = np.logical_and( cold_users_batch_mask, np.logical_not(cold_users_in_MF_warm_in_KNN_mask)) # Set as -inf all remaining cold user scores item_scores[cold_users_batch_mask, :] = -np.ones_like( item_scores[cold_users_batch_mask, :]) * np.inf return item_scores ######################################################################################################### ########## ########## ########## LOAD AND SAVE ########## ########## ########## ######################################################################################################### def saveModel(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME, folder_path + file_name)) data_dict_to_save = { "USER_factors": self.USER_factors, "ITEM_factors": self.ITEM_factors, "use_bias": self.use_bias, "_cold_user_mask": self._cold_user_mask } if self.use_bias: data_dict_to_save["ITEM_bias"] = self.ITEM_bias data_dict_to_save["USER_bias"] = self.USER_bias data_dict_to_save["GLOBAL_bias"] = self.GLOBAL_bias dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save=data_dict_to_save) print("{}: Saving complete".format(self.RECOMMENDER_NAME, folder_path + file_name))
class MF_cold_user_wrapper(BaseMatrixFactorizationRecommender): """ MF_cold_user_wrapper""" RECOMMENDER_NAME = "MF_cold_user_wrapper" def __init__(self, MF_recommender_class: BaseMatrixFactorizationRecommender, *posargs, **kwargs): """ Creates an instance of the MF algorithm with the given hyperparameters and data :param MF_recommender_class: :param posargs: :param kwargs: """ super(MF_cold_user_wrapper, self).__init__(*posargs, **kwargs) self.mf_recommender = MF_recommender_class(*posargs, **kwargs) self.RECOMMENDER_NAME = self.mf_recommender.RECOMMENDER_NAME # + "_cold_user_Wrapper" self.estimate_model_for_cold_users = False self._cold_user_KNN_model_flag = False self._cold_user_KNN_estimated_factors_flag = False self._warm_user_KNN_mask = np.zeros(len(self._get_cold_user_mask()), dtype=np.bool) def _compute_item_score(self, user_id_array, items_to_compute=None): """ Compute the items scores using the native function for the MF algorithm :param posargs: :param kwargs: :return: """ # item_scores = self.mf_recommender._compute_item_score(user_id_array, items_to_compute = items_to_compute) item_scores = super(MF_cold_user_wrapper, self)._compute_item_score( user_id_array, items_to_compute=items_to_compute) item_scores = self._compute_item_score_for_cold_users( user_id_array, item_scores, items_to_compute=items_to_compute) return item_scores def _compute_item_score_for_cold_users(self, user_id_array, item_scores, items_to_compute=None): """ Compute item scores with the ItemKNN model :param user_id_array: :param item_scores: :return: """ cold_users_batch_mask = self._get_cold_user_mask()[user_id_array] if cold_users_batch_mask.any( ) and not self._cold_user_KNN_estimated_factors_flag: if self._cold_user_KNN_model_flag: # Add KNN scores for users cold for MF but warm in KNN model cold_users_in_MF_warm_in_KNN_mask = np.logical_and( cold_users_batch_mask, self._warm_user_KNN_mask[user_id_array]) item_scores[ cold_users_in_MF_warm_in_KNN_mask, :] = self._ItemKNNRecommender._compute_item_score( user_id_array[cold_users_in_MF_warm_in_KNN_mask], items_to_compute=items_to_compute) return item_scores def set_URM_train(self, URM_train_new): """ :param URM_train_new: :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm :param topK: 100 :param kwargs: :return: """ assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format( self.RECOMMENDER_NAME) URM_train_new = check_matrix(URM_train_new, 'csr', dtype=np.float32) profile_length_new = np.ediff1d(URM_train_new.indptr) if self.estimate_model_for_cold_users == "itemKNN": self._print("Generating ItemKNN model from ITEM latent factors...") W_sparse = compute_W_sparse_from_item_latent_factors( self.ITEM_factors, topK=self.estimate_model_for_cold_users_topK) self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender( URM_train_new) self._ItemKNNRecommender.fit(W_sparse, topK=None) self._cold_user_KNN_model_flag = True self._cold_user_KNN_model_flag = True self._warm_user_KNN_mask = profile_length_new > 0 self._print( "Generating ItemKNN model from ITEM latent factors... done!") elif self.estimate_model_for_cold_users == "mean_item_factors": self._print( "Estimating USER latent factors from ITEM latent factors...") cold_user_mask_previous = self._get_cold_user_mask() profile_length_sqrt = np.sqrt(profile_length_new) self.USER_factors[cold_user_mask_previous, :] = URM_train_new.dot( self.ITEM_factors)[cold_user_mask_previous, :] self._cold_user_KNN_estimated_factors_flag = True #Divide every row for the sqrt of the profile length for user_index in range(self.n_users): if cold_user_mask_previous[ user_index] and profile_length_sqrt[user_index] > 0: self.USER_factors[ user_index, :] /= profile_length_sqrt[user_index] self._print( "Estimating USER latent factors from ITEM latent factors... done!" ) self.URM_train = sps.csr_matrix(URM_train_new.copy()) self.URM_train.eliminate_zeros() def fit(self, *posargs, estimate_model_for_cold_users=None, estimate_model_for_cold_users_topK=100, **kwargs): """ Fits the MF model with the given hyper-parameters and sets the kwarg "estimate_model_for_cold_users" :param posargs: :param kwargs: :return: """ self.estimate_model_for_cold_users = estimate_model_for_cold_users self.estimate_model_for_cold_users_topK = estimate_model_for_cold_users_topK self.mf_recommender.fit(*posargs, **kwargs) self.USER_factors = self.mf_recommender.USER_factors.copy() self.ITEM_factors = self.mf_recommender.ITEM_factors.copy() def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) self.mf_recommender.save_model(folder_path, file_name=file_name + "_warm_users") data_dict_to_save = { "_cold_user_KNN_model_flag": self._cold_user_KNN_model_flag, "_cold_user_KNN_estimated_factors_flag": self._cold_user_KNN_estimated_factors_flag } if self._cold_user_KNN_model_flag: self._ItemKNNRecommender.save_model(folder_path, file_name=file_name + "_cold_users") dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save=data_dict_to_save) self._print("Saving complete") def load_model(self, folder_path, file_name=None): super(BaseMatrixFactorizationRecommender, self).load_model(folder_path, file_name=file_name) self.mf_recommender.load_model(folder_path, file_name=file_name + "_warm_users") if self._cold_user_KNN_model_flag: self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender( self.URM_train) self._ItemKNNRecommender.load_model(folder_path, file_name=file_name + "_cold_users")
def set_URM_train(self, URM_train_new): """ :param URM_train_new: :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm :param topK: 100 :param kwargs: :return: """ assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format( self.RECOMMENDER_NAME) URM_train_new = check_matrix(URM_train_new, 'csr', dtype=np.float32) profile_length_new = np.ediff1d(URM_train_new.indptr) if self.estimate_model_for_cold_users == "itemKNN": self._print("Generating ItemKNN model from ITEM latent factors...") W_sparse = compute_W_sparse_from_item_latent_factors( self.ITEM_factors, topK=self.estimate_model_for_cold_users_topK) self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender( URM_train_new) self._ItemKNNRecommender.fit(W_sparse, topK=None) self._cold_user_KNN_model_flag = True self._cold_user_KNN_model_flag = True self._warm_user_KNN_mask = profile_length_new > 0 self._print( "Generating ItemKNN model from ITEM latent factors... done!") elif self.estimate_model_for_cold_users == "mean_item_factors": self._print( "Estimating USER latent factors from ITEM latent factors...") cold_user_mask_previous = self._get_cold_user_mask() profile_length_sqrt = np.sqrt(profile_length_new) self.USER_factors[cold_user_mask_previous, :] = URM_train_new.dot( self.ITEM_factors)[cold_user_mask_previous, :] self._cold_user_KNN_estimated_factors_flag = True #Divide every row for the sqrt of the profile length for user_index in range(self.n_users): if cold_user_mask_previous[ user_index] and profile_length_sqrt[user_index] > 0: self.USER_factors[ user_index, :] /= profile_length_sqrt[user_index] self._print( "Estimating USER latent factors from ITEM latent factors... done!" ) self.URM_train = sps.csr_matrix(URM_train_new.copy()) self.URM_train.eliminate_zeros()
class BaseMatrixFactorizationRecommender(BaseRecommender): """ This class refers to a BaseRecommender KNN which uses matrix factorization, it provides functions to compute item's score as well as a function to save the W_matrix The prediction for cold users will always be -inf for ALL items """ def __init__(self, URM_train, verbose=True, evaluation_block_size: int = 1): super(BaseMatrixFactorizationRecommender, self).__init__(URM_train, verbose=verbose, evaluation_block_size=evaluation_block_size) self.use_bias = False self._cold_user_KNN_model_flag = False self._cold_user_KNN_estimated_factors_flag = False self._warm_user_KNN_mask = np.zeros(len(self._get_cold_user_mask()), dtype=np.bool) def set_URM_train(self, URM_train_new, estimate_model_for_cold_users=False, topK=100, **kwargs): """ :param URM_train_new: :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm :param topK: 100 :param kwargs: :return: """ assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format( self.RECOMMENDER_NAME) if len(kwargs) > 0: self._print( "set_URM_train keyword arguments not supported for this recommender class. Received: {}" .format(kwargs)) URM_train_new = check_matrix(URM_train_new, 'csr', dtype=np.float32) profile_length_new = np.ediff1d(URM_train_new.indptr) if estimate_model_for_cold_users == "itemKNN": self._print("Estimating ItemKNN model from ITEM latent factors...") W_sparse = compute_W_sparse_from_item_latent_factors( self.ITEM_factors, topK=topK) self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender( URM_train_new) self._ItemKNNRecommender.fit(W_sparse, topK=topK) self._ItemKNNRecommender_topK = topK self._cold_user_KNN_model_flag = True self._warm_user_KNN_mask = profile_length_new > 0 self._print( "Estimating ItemKNN model from ITEM latent factors... done!") elif estimate_model_for_cold_users == "mean_item_factors": self._print( "Estimating USER latent factors from ITEM latent factors...") cold_user_mask_previous = self._get_cold_user_mask() profile_length_sqrt = np.sqrt(profile_length_new) self.USER_factors[cold_user_mask_previous, :] = URM_train_new.dot( self.ITEM_factors)[cold_user_mask_previous, :] self._cold_user_KNN_estimated_factors_flag = True #Divide every row for the sqrt of the profile length for user_index in range(self.n_users): if cold_user_mask_previous[ user_index] and profile_length_sqrt[user_index] > 0: self.USER_factors[ user_index, :] /= profile_length_sqrt[user_index] self._print( "Estimating USER latent factors from ITEM latent factors... done!" ) self.URM_train = check_matrix(URM_train_new.copy(), 'csr', dtype=np.float32) self.URM_train.eliminate_zeros() ######################################################################################################### ########## ########## ########## COMPUTE ITEM SCORES ########## ########## ########## ######################################################################################################### def _compute_item_score(self, user_id_array, items_to_compute=None): """ USER_factors is n_users x n_factors ITEM_factors is n_items x n_factors The prediction for cold users will always be -inf for ALL items :param user_id_array: :param items_to_compute: :return: """ assert self.USER_factors.shape[1] == self.ITEM_factors.shape[1], \ "{}: User and Item factors have inconsistent shape".format(self.RECOMMENDER_NAME) assert self.USER_factors.shape[0] > np.max(user_id_array),\ "{}: Cold users not allowed. Users in trained model are {}, requested prediction for users up to {}".format( self.RECOMMENDER_NAME, self.USER_factors.shape[0], np.max(user_id_array)) if items_to_compute is not None: item_scores = -np.ones( (len(user_id_array), self.ITEM_factors.shape[0]), dtype=np.float32) * np.inf item_scores[:, items_to_compute] = np.dot( self.USER_factors[user_id_array], self.ITEM_factors[items_to_compute, :].T) else: item_scores = np.dot(self.USER_factors[user_id_array], self.ITEM_factors.T) # No need to select only the specific negative items or warm users because the -inf score will not change if self.use_bias: item_scores += self.ITEM_bias + self.GLOBAL_bias item_scores = (item_scores.T + self.USER_bias[user_id_array]).T return item_scores ######################################################################################################### ########## ########## ########## LOAD AND SAVE ########## ########## ########## ######################################################################################################### def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME logger.info(f"Saving model in file '{folder_path + file_name}'") data_dict_to_save = { "USER_factors": self.USER_factors, "ITEM_factors": self.ITEM_factors, "use_bias": self.use_bias, "_cold_user_mask": self._cold_user_mask, "_cold_user_KNN_model_flag": self._cold_user_KNN_model_flag, "_cold_user_KNN_estimated_factors_flag": self._cold_user_KNN_estimated_factors_flag } if self.use_bias: data_dict_to_save["ITEM_bias"] = self.ITEM_bias data_dict_to_save["USER_bias"] = self.USER_bias data_dict_to_save["GLOBAL_bias"] = self.GLOBAL_bias if self._cold_user_KNN_model_flag: data_dict_to_save[ "_ItemKNNRecommender_W_sparse"] = self._ItemKNNRecommender.W_sparse data_dict_to_save[ "_ItemKNNRecommender_topK"] = self._ItemKNNRecommender_topK dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save=data_dict_to_save) self._print("Saving complete") def load_model(self, folder_path, file_name=None): super(BaseMatrixFactorizationRecommender, self).load_model(folder_path, file_name=file_name) if self._cold_user_KNN_model_flag: self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender( self.URM_train) self._ItemKNNRecommender.fit(self._ItemKNNRecommender_W_sparse, topK=self._ItemKNNRecommender_topK) del self._ItemKNNRecommender_W_sparse del self._ItemKNNRecommender_topK