def get_recommendations(self): userid = self.data.fields.userid itemid = self.data.fields.itemid u = self.factors[userid] v = self.factors[itemid] s = self.factors['singular_values'] if self.use_raw_features: item_info = self.item_features.reindex( self.data.index.itemid.training.old.values, fill_value=[]) item_features, feature_labels = stack_features(item_info, normalize=False) w = item_features.T.dot(v).T wwt_inv = np.linalg.pinv(w @ w.T) cold_info = self.item_features.reindex( self.data.index.itemid.cold_start.old.values, fill_value=[]) cold_item_features, _ = stack_features(cold_info, labels=feature_labels, normalize=False) else: w = self.data.item_relations.T.dot(v).T wwt_inv = np.linalg.pinv(w @ w.T) cold_item_features = self.data.cold_items_similarity cold_items_factors = cold_item_features.dot(w.T) @ wwt_inv scores = cold_items_factors @ (u * s[None, :]).T top_similar_users = self.get_topk_elements(scores).astype(np.intp) return top_similar_users
def build(self): self._model = LightFM(no_components=self.rank, item_alpha=self.item_alpha, user_alpha=self.user_alpha, loss=self.loss, learning_rate=self.learning_rate, learning_schedule=self.learning_schedule, max_sampled=self.max_sampled, random_state=self.seed) fit = getattr(self._model, self.fit_method) matrix = self.get_training_matrix() if self.item_features is not None: item_features = self.item_features.reindex( self.data.index.itemid.old.values, fill_value=[]) self._item_features_csr, self.item_feature_labels = stack_features( item_features, add_identity=self.item_identity, normalize=True, dtype='f4') if self.user_features is not None: user_features = self.user_features.reindex( self.data.index.userid.training.old.values, fill_value=[]) self._user_features_csr, self.user_feature_labels = stack_features( user_features, add_identity=self.user_identity, normalize=True, dtype='f4') with Timer(self.method, verbose=self.verbose): fit(matrix, item_features=self._item_features_csr, user_features=self._user_features_csr)
def build(self): # prepare input matrix for learning the model Xs, lbls = stack_features(self.item_data, normalize=False) # item-features sparse matrix Xu = self.get_training_matrix().T # item-user sparse matrix n_nbrs = min(self.max_neighbours, int(math.sqrt(Xs.shape[0]))) A = construct_A(Xs, n_nbrs, binary=self.binary_features) with track_time(self.training_time, verbose=self.verbose, model=self.method): W, Hu, Hs = LCE(Xs, Xu, A, k=self.rank, alpha=self.alpha, beta=self.beta, lamb=self.regularization, epsilon=self.tolerance, maxiter=self.max_iterations, seed=self.seed, verbose=self.show_error) userid = self.data.fields.userid itemid = self.data.fields.itemid self.factors[userid] = Hu.T self.factors[itemid] = W self.factors['item_features'] = Hs.T self.feature_labels = lbls
def get_similarity_data(meta_info, metric='common', assume_binary=True, fill_diagonal=True): feat_mat, lbls = stack_features(meta_info, normalize=False) if metric == 'common': item_similarity = feat_mat.dot(feat_mat.T) item_similarity = item_similarity / item_similarity.data.max() item_similarity.setdiag(1.0) if (metric == 'cosine') or (metric == 'salton'): item_similarity = cosine_similarity(feat_mat, assume_binary=assume_binary, fill_diagonal=fill_diagonal) if item_similarity.format == 'csr': item_similarity = item_similarity.T # ensure CSC format (matrix is symmetric) userid = 'userid' itemid = meta_info.index.name similarities = {userid: None, itemid: item_similarity} indices = {userid: None, itemid: meta_info.index} labels = {userid: None, itemid: lbls} return similarities, indices, labels
def build(self): self._model = LightFM(no_components=self.rank, item_alpha=self.item_alpha, user_alpha=self.user_alpha, loss=self.loss, learning_rate=self.learning_rate, learning_schedule=self.learning_schedule, max_sampled=self.max_sampled, random_state=self.seed) fit = getattr(self._model, self.fit_method) matrix = self.get_training_matrix( sparse_format='coo') # as reqired by LightFM try: item_index = self.data.index.itemid.training except AttributeError: item_index = self.data.index.itemid if self.item_features is not None: item_features = self.item_features.reindex(item_index.old.values, fill_value=[]) self._item_features_csr, self.item_features_labels = stack_features( item_features, add_identity=self.item_identity, normalize=self.normalize_item_features, dtype='f4') if self.user_features is not None: user_features = self.user_features.reindex( self.data.index.userid.training.old.values, fill_value=[]) self._user_features_csr, self.user_features_labels = stack_features( user_features, add_identity=self.user_identity, normalize=self.normalize_user_features, dtype='f4') with track_time(self.training_time, verbose=self.verbose, model=self.method): fit(matrix, item_features=self._item_features_csr, user_features=self._user_features_csr, **self.fit_params)
def get_recommendations(self): userid = self.data.fields.userid itemid = self.data.fields.itemid Hu = self.factors[userid].T Hs = self.factors['item_features'].T cold_info = self.item_features.reindex(self.data.index.itemid.cold_start.old.values, fill_value=[]) cold_item_features, _ = stack_features(cold_info, labels=self.feature_labels, normalize=False) cold_items_factors = cold_item_features.dot(Hs.T).dot(np.linalg.pinv(Hs @ Hs.T)) cold_items_factors[cold_items_factors < 0] = 0 scores = cold_items_factors @ Hu top_relevant_users = self.get_topk_elements(scores).astype(np.intp) return top_relevant_users