def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting="none", interactions_feature_weighting="none", **similarity_args): if interactions_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, interactions_feature_weighting)) if interactions_feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train.T).T self.URM_train = check_matrix(self.URM_train, 'csr') elif interactions_feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train.T).T self.URM_train = check_matrix(self.URM_train, 'csr') super().fit(topK=topK, shrink=shrink, similarity=similarity, normalize=normalize, feature_weighting=feature_weighting, **similarity_args)
def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting="none", **similarity_args): self.topK = topK self.shrink = shrink if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'".format( self.FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') elif feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') similarity = Compute_Similarity(self.URM_train.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) self.W_sparse = similarity.compute_similarity() self.W_sparse = check_matrix(self.W_sparse, format='csr')
def get_model(cls, URM_train, ICM_train): from course_lib.MatrixFactorization.PureSVDRecommender import PureSVDRecommender from course_lib.Base.IR_feature_weighting import TF_IDF URM_train_side_info = TF_IDF(sps.vstack([URM_train, ICM_train.T])).tocsr() model = PureSVDRecommender(URM_train_side_info) model.fit(**cls.get_best_parameters()) return model
def get_model(cls, URM_train, ICM_train, apply_tf_idf=True): from src.model.MatrixFactorization.NewPureSVDRecommender import NewPureSVDRecommender from course_lib.Base.IR_feature_weighting import TF_IDF if apply_tf_idf: URM_train_side_info = TF_IDF(sps.vstack([URM_train, ICM_train.T])).tocsr() else: URM_train_side_info = sps.vstack([URM_train, ICM_train.T]).tocsr() model = NewPureSVDRecommender(URM_train_side_info) model.fit(**cls.get_best_parameters()) return model
def get_model(cls, URM_train, ICM_train, apply_tf_idf=True): from course_lib.Base.IR_feature_weighting import TF_IDF if apply_tf_idf: URM_train_side_info = TF_IDF(sps.vstack([URM_train, ICM_train.T])).tocsr() else: URM_train_side_info = sps.vstack([URM_train, ICM_train.T]).tocsr() from course_lib.GraphBased.RP3betaRecommender import RP3betaRecommender model = RP3betaRecommender(URM_train_side_info) model.fit(**cls.get_best_parameters()) return model
def fit(self, topK=50, shrink=100, normalize=True, feature_weighting="none"): self.topK = topK self.shrink = shrink if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') elif feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') denominator = 1 if shrink == 0 else shrink self.W_sparse = self.URM_train.dot( self.URM_train.T) * (1 / denominator) if self.topK >= 0: self.W_sparse = userSimilarityMatrixTopK(self.W_sparse, k=self.topK).tocsr() if normalize: self.W_sparse = normalize_sk(self.W_sparse, norm="l1", axis=1) self.W_sparse = check_matrix(self.W_sparse, format='csr')
def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting="none", **similarity_args): self.topK = topK self.topComputeK = topK + len(self.cold_users) self.shrink = shrink if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = okapi_BM_25(self.UCM_train) elif feature_weighting == "TF-IDF": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = TF_IDF(self.UCM_train) similarity = Compute_Similarity(self.UCM_train.T, shrink=shrink, topK=self.topComputeK, normalize=normalize, similarity=similarity, **similarity_args) self.W_sparse = similarity.compute_similarity() self.W_sparse = self.W_sparse.tocsc() for user in self.cold_users: self.W_sparse.data[self.W_sparse.indptr[user]:self.W_sparse. indptr[user + 1]] = 0 self.W_sparse.eliminate_zeros() self.W_sparse = self.W_sparse.tocsr() self.W_sparse = similarityMatrixTopK(self.W_sparse, k=self.topK).tocsr() self.W_sparse = check_matrix(self.W_sparse, format='csr') # Add identity matrix to the recommender self.recommender.W_sparse = self.recommender.W_sparse + sps.identity( self.recommender.W_sparse.shape[0], format="csr")
class UserKNNDotCFRecommender(BaseUserSimilarityMatrixRecommender): """ UserKNNDotCFRecommender """ RECOMMENDER_NAME = "UserKNNDotCFRecommender" FEATURE_WEIGHTING_VALUES = ["BM25", "TF-IDF", "none"] def __init__(self, URM_train, verbose=True): super(UserKNNDotCFRecommender, self).__init__(URM_train, verbose=verbose) def fit(self, topK=50, shrink=100, normalize=True, feature_weighting="none"): self.topK = topK self.shrink = shrink if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') elif feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') denominator = 1 if shrink == 0 else shrink self.W_sparse = self.URM_train.dot( self.URM_train.T) * (1 / denominator) if self.topK >= 0: self.W_sparse = userSimilarityMatrixTopK(self.W_sparse, k=self.topK).tocsr() if normalize: self.W_sparse = normalize_sk(self.W_sparse, norm="l1", axis=1) self.W_sparse = check_matrix(self.W_sparse, format='csr')
def precompute_best_item_indices(self, URM: sps.csr_matrix): URM = URM.copy() if self.feature_weighting == "BM25": URM = URM.astype(np.float32) URM = okapi_BM_25(URM) URM = check_matrix(URM, 'csr') elif self.feature_weighting == "TF-IDF": URM = URM.astype(np.float32) URM = TF_IDF(URM) URM = check_matrix(URM, 'csr') similarity = Compute_Similarity(URM, shrink=self.shrink, topK=self.topK, normalize=self.normalize, similarity="cosine") similarity_matrix = similarity.compute_similarity() self.sorted_indices = np.array( np.argsort(-similarity_matrix.todense(), axis=1))
def apply_feature_weighting(matrix, feature_weighting="none"): from course_lib.Base.IR_feature_weighting import okapi_BM_25, TF_IDF from course_lib.Base.Recommender_utils import check_matrix FEATURE_WEIGHTING_VALUES = ["BM25", "TF-IDF", "none"] if feature_weighting not in FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": matrix = matrix.astype(np.float32) matrix = okapi_BM_25(matrix) matrix = check_matrix(matrix, 'csr') elif feature_weighting == "TF-IDF": matrix = matrix.astype(np.float32) matrix = TF_IDF(matrix) matrix = check_matrix(matrix, 'csr') return matrix
def get_model(cls, URM_train, ICM_train, load_model=False, save_model=False): from course_lib.Base.IR_feature_weighting import TF_IDF from course_lib.GraphBased.RP3betaRecommender import RP3betaRecommender URM_train_side_info = TF_IDF(sps.vstack([URM_train, ICM_train.T])).tocsr() model = RP3betaRecommender(URM_train_side_info) try: if load_model: model = cls._load_model(model) return model except FileNotFoundError as e: print("WARNING: Cannot find model to be loaded") model.fit(**cls.get_best_parameters()) if save_model: cls._save_model(model) return model
def main(): args = get_arguments() # Data loading data_reader = RecSys2019Reader(args.reader_path) data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=3, use_validation_set=False, force_new_split=True, seed=args.seed) data_reader.load_data() URM_train, URM_test = data_reader.get_holdout_split() if args.recommender_name == "sslim_bpr": ICM_all = get_ICM_train(data_reader) URM_train = sps.vstack([URM_train, ICM_all.T], format="csr") if args.recommender_name == "rp3beta_side": ICM_all = get_ICM_train(data_reader) URM_train = sps.vstack([URM_train, ICM_all.T], format="csr") URM_train = TF_IDF(URM_train).tocsr() if args.recommender_name == "pure_svd": URM_train = TF_IDF(URM_train).tocsr() if args.recommender_name == "pure_svd_side": ICM_all = get_ICM_train(data_reader) URM_train = sps.vstack([URM_train, ICM_all.T], format="csr") # Setting evaluator exclude_cold_users = args.exclude_users h = int(args.focus_on_high) fol = int(args.focus_on_low) if h != 0: print("Excluding users with less than {} interactions".format(h)) ignore_users_mask = np.ediff1d(URM_train.tocsr().indptr) < h ignore_users = np.arange(URM_train.shape[0])[ignore_users_mask] elif fol != 0: print("Excluding users with more than {} interactions".format(fol)) warm_users_mask = np.ediff1d(URM_train.tocsr().indptr) > fol ignore_users = np.arange(URM_train.shape[0])[warm_users_mask] if exclude_cold_users: cold_user_mask = np.ediff1d(URM_train.tocsr().indptr) == 0 cold_users = np.arange(URM_train.shape[0])[cold_user_mask] ignore_users = np.unique(np.concatenate((cold_users, ignore_users))) elif exclude_cold_users: print("Excluding cold users...") cold_user_mask = np.ediff1d(URM_train.tocsr().indptr) == 0 ignore_users = np.arange(URM_train.shape[0])[cold_user_mask] else: ignore_users = None cutoff_list = [10] evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=ignore_users) # HP tuning print("Start tuning...") version_path = "../../report/hp_tuning/{}/".format(args.recommender_name) now = datetime.now().strftime('%b%d_%H-%M-%S') now = now + "_k_out_value_3/" version_path = version_path + "/" + now runParameterSearch_Collaborative(URM_train=URM_train, recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name], evaluator_validation=evaluator, metric_to_optimize="MAP", output_folder_path=version_path, n_cases=int(args.n_cases), n_random_starts=int(args.n_random_starts)) print("...tuning ended")
def fit(self, user_topK=50, user_shrink=100, user_similarity_type='cosine', user_normalize=True, user_feature_weighting="none", user_asymmetric_alpha=0.5, item_topK=50, item_shrink=100, item_similarity_type='cosine', item_normalize=True, item_feature_weighting="none", item_asymmetric_alpha=0.5, interactions_feature_weighting="none"): if interactions_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, interactions_feature_weighting)) if interactions_feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') elif interactions_feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') # User Similarity Computation self.user_topK = user_topK self.user_shrink = user_shrink if user_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, user_feature_weighting)) if user_feature_weighting == "BM25": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = okapi_BM_25(self.UCM_train) elif user_feature_weighting == "TF-IDF": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = TF_IDF(self.UCM_train) kwargs = {"asymmetric_alpha": user_asymmetric_alpha} user_similarity_compute = Compute_Similarity( self.UCM_train.T, shrink=user_shrink, topK=user_topK, normalize=user_normalize, similarity=user_similarity_type, **kwargs) self.user_W_sparse = user_similarity_compute.compute_similarity() self.user_W_sparse = check_matrix(self.user_W_sparse, format='csr') # Item Similarity Computation self.item_topK = item_topK self.item_shrink = item_shrink if item_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, item_feature_weighting)) if item_feature_weighting == "BM25": self.ICM_train = self.ICM_train.astype(np.float32) self.ICM_train = okapi_BM_25(self.ICM_train) elif item_feature_weighting == "TF-IDF": self.ICM_train = self.ICM_train.astype(np.float32) self.ICM_train = TF_IDF(self.ICM_train) kwargs = {"asymmetric_alpha": item_asymmetric_alpha} item_similarity_compute = Compute_Similarity( self.ICM_train.T, shrink=item_shrink, topK=item_topK, normalize=item_normalize, similarity=item_similarity_type, **kwargs) self.item_W_sparse = item_similarity_compute.compute_similarity() self.item_W_sparse = check_matrix(self.item_W_sparse, format='csr')
class UserItemCBFCFDemographicRecommender(BaseRecommender): """ UserItem KNN CBF & CF & Demographic Recommender""" RECOMMENDER_NAME = "UserItemCBFCFDemographicRecommender" FEATURE_WEIGHTING_VALUES = ["BM25", "TF-IDF", "none"] def __init__(self, URM_train, UCM_train, ICM_train, verbose=True): super(UserItemCBFCFDemographicRecommender, self).__init__(URM_train, verbose=verbose) self._URM_train_format_checked = False self._user_W_sparse_format_checked = False self._item_W_sparse_format_checked = False self.UCM_train = sps.hstack([UCM_train, URM_train], format="csr") self.ICM_train = sps.hstack([ICM_train, URM_train.T], format="csr") def fit(self, user_topK=50, user_shrink=100, user_similarity_type='cosine', user_normalize=True, user_feature_weighting="none", user_asymmetric_alpha=0.5, item_topK=50, item_shrink=100, item_similarity_type='cosine', item_normalize=True, item_feature_weighting="none", item_asymmetric_alpha=0.5, interactions_feature_weighting="none"): if interactions_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, interactions_feature_weighting)) if interactions_feature_weighting == "BM25": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = okapi_BM_25(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') elif interactions_feature_weighting == "TF-IDF": self.URM_train = self.URM_train.astype(np.float32) self.URM_train = TF_IDF(self.URM_train) self.URM_train = check_matrix(self.URM_train, 'csr') # User Similarity Computation self.user_topK = user_topK self.user_shrink = user_shrink if user_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, user_feature_weighting)) if user_feature_weighting == "BM25": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = okapi_BM_25(self.UCM_train) elif user_feature_weighting == "TF-IDF": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = TF_IDF(self.UCM_train) kwargs = {"asymmetric_alpha": user_asymmetric_alpha} user_similarity_compute = Compute_Similarity( self.UCM_train.T, shrink=user_shrink, topK=user_topK, normalize=user_normalize, similarity=user_similarity_type, **kwargs) self.user_W_sparse = user_similarity_compute.compute_similarity() self.user_W_sparse = check_matrix(self.user_W_sparse, format='csr') # Item Similarity Computation self.item_topK = item_topK self.item_shrink = item_shrink if item_feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, item_feature_weighting)) if item_feature_weighting == "BM25": self.ICM_train = self.ICM_train.astype(np.float32) self.ICM_train = okapi_BM_25(self.ICM_train) elif item_feature_weighting == "TF-IDF": self.ICM_train = self.ICM_train.astype(np.float32) self.ICM_train = TF_IDF(self.ICM_train) kwargs = {"asymmetric_alpha": item_asymmetric_alpha} item_similarity_compute = Compute_Similarity( self.ICM_train.T, shrink=item_shrink, topK=item_topK, normalize=item_normalize, similarity=item_similarity_type, **kwargs) self.item_W_sparse = item_similarity_compute.compute_similarity() self.item_W_sparse = check_matrix(self.item_W_sparse, format='csr') def _compute_item_score(self, user_id_array, items_to_compute=None): """ URM_train and W_sparse must have the same format, CSR :param user_id_array: :param items_to_compute: not implemented!! :return: """ self._check_format() user_weights_array = self.user_W_sparse[user_id_array] if items_to_compute is not None: item_scores_user_similarity = -np.ones( (len(user_id_array), self.URM_train.shape[1]), dtype=np.float32) * np.inf item_scores_user_similarity_all = user_weights_array.dot( self.URM_train).toarray() item_scores_user_similarity[:, items_to_compute] = item_scores_user_similarity_all[:, items_to_compute] else: item_scores_user_similarity = user_weights_array.dot( self.URM_train) user_profile_array = item_scores_user_similarity + self.URM_train[ user_id_array] if items_to_compute is not None: item_scores_item_similarity = -np.ones( (len(user_id_array), self.URM_train.shape[1]), dtype=np.float32) * np.inf item_scores_item_similarity_all = user_profile_array.dot( self.item_W_sparse).toarray() item_scores_item_similarity[:, items_to_compute] = item_scores_item_similarity_all[:, items_to_compute] else: item_scores_item_similarity = user_profile_array.dot( self.item_W_sparse).toarray() return item_scores_item_similarity def _check_format(self): if not self._URM_train_format_checked: if self.URM_train.getformat() != "csr": self._print( "PERFORMANCE ALERT compute_item_score: {} is not {}, this will significantly slow down the computation." .format("URM_train", "csr")) self._URM_train_format_checked = True if not self._item_W_sparse_format_checked: if self.item_W_sparse.getformat() != "csr": self._print( "PERFORMANCE ALERT compute_item_score: {} is not {}, this will significantly slow down " "the computation.".format("item_W_sparse", "csr")) self._item_W_sparse_format_checked = True if not self._user_W_sparse_format_checked: if self.user_W_sparse.getformat() != "csr": self._print( "PERFORMANCE ALERT compute_item_score: {} is not {}, this will significantly slow down " "the computation.".format("user__sparse", "csr")) self._user_W_sparse_format_checked = True def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) data_dict_to_save = { "user_W_sparse": self.user_W_sparse, "item_W_sparse": self.item_W_sparse } dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save=data_dict_to_save) self._print("Saving complete")
def fit(self, alpha=1., beta=0.6, min_rating=0, topK=100, implicit=False, normalize_similarity=True): self.alpha = alpha self.beta = beta self.min_rating = min_rating self.topK = topK self.implicit = implicit self.normalize_similarity = normalize_similarity # if X.dtype != np.float32: # print("RP3beta fit: For memory usage reasons, we suggest to use np.float32 as dtype for the dataset") if self.min_rating > 0: self.URM_train.data[self.URM_train.data < self.min_rating] = 0 self.URM_train.eliminate_zeros() if self.implicit: self.URM_train.data = np.ones(self.URM_train.data.size, dtype=np.float32) # Pui is the row-normalized urm Pui_raw = sps.hstack([self.URM_train, self.UCM_train], format="csr") Pui_raw = TF_IDF(Pui_raw).tocsr() Pui = normalize(Pui_raw, norm='l1', axis=1) # Piu is the column-normalized, "boolean" urm transposed # X_bool = self.URM_train.transpose(copy=True) X_bool = Pui_raw.transpose(copy=True) X_bool.data = np.ones(X_bool.data.size, np.float32) # Taking the degree of each item to penalize top popular # Some rows might be zero, make sure their degree remains zero X_bool_sum = np.array(X_bool.sum(axis=1)).ravel() degree = np.zeros(Pui_raw.shape[1]) nonZeroMask = X_bool_sum != 0.0 degree[nonZeroMask] = np.power(X_bool_sum[nonZeroMask], -self.beta) # ATTENTION: axis is still 1 because i transposed before the normalization Piu = normalize(X_bool, norm='l1', axis=1) del (X_bool) # Alfa power if self.alpha != 1.: Pui = Pui.power(self.alpha) Piu = Piu.power(self.alpha) # Final matrix is computed as Pui * Piu * Pui # Multiplication unpacked for memory usage reasons block_dim = 200 d_t = Piu # Use array as it reduces memory requirements compared to lists dataBlock = 10000000 rows = np.zeros(dataBlock, dtype=np.int32) cols = np.zeros(dataBlock, dtype=np.int32) values = np.zeros(dataBlock, dtype=np.float32) numCells = 0 start_time = time.time() start_time_printBatch = start_time for current_block_start_row in range(0, Pui.shape[1], block_dim): if current_block_start_row + block_dim > Pui.shape[1]: block_dim = Pui.shape[1] - current_block_start_row similarity_block = d_t[ current_block_start_row:current_block_start_row + block_dim, :] * Pui similarity_block = similarity_block.toarray() for row_in_block in range(block_dim): row_data = np.multiply(similarity_block[row_in_block, :], degree) row_data[current_block_start_row + row_in_block] = 0 best = row_data.argsort()[::-1][:self.topK] notZerosMask = row_data[best] != 0.0 values_to_add = row_data[best][notZerosMask] cols_to_add = best[notZerosMask] for index in range(len(values_to_add)): if numCells == len(rows): rows = np.concatenate( (rows, np.zeros(dataBlock, dtype=np.int32))) cols = np.concatenate( (cols, np.zeros(dataBlock, dtype=np.int32))) values = np.concatenate( (values, np.zeros(dataBlock, dtype=np.float32))) rows[numCells] = current_block_start_row + row_in_block cols[numCells] = cols_to_add[index] values[numCells] = values_to_add[index] numCells += 1 if time.time() - start_time_printBatch > 60: self._print( "Processed {} ( {:.2f}% ) in {:.2f} minutes. Rows per second: {:.0f}" .format( current_block_start_row, 100.0 * float(current_block_start_row) / Pui.shape[1], (time.time() - start_time) / 60, float(current_block_start_row) / (time.time() - start_time))) sys.stdout.flush() sys.stderr.flush() start_time_printBatch = time.time() self.W_sparse = sps.csr_matrix( (values[:numCells], (rows[:numCells], cols[:numCells])), shape=(Pui.shape[1], Pui.shape[1])) if self.normalize_similarity: self.W_sparse = normalize(self.W_sparse, norm='l1', axis=1) if self.topK != False: self.W_sparse = similarityMatrixTopK(self.W_sparse, k=self.topK) self.W_sparse = check_matrix(self.W_sparse, format='csr')