Beispiel #1
0
    def get_recommendations(self):
        userid = self.data.fields.userid
        itemid = self.data.fields.itemid

        u = self.factors[userid]
        v = self.factors[itemid]
        s = self.factors['singular_values']

        if self.use_raw_features:
            item_info = self.item_features.reindex(
                self.data.index.itemid.training.old.values, fill_value=[])
            item_features, feature_labels = stack_features(item_info,
                                                           normalize=False)
            w = item_features.T.dot(v).T
            wwt_inv = np.linalg.pinv(w @ w.T)

            cold_info = self.item_features.reindex(
                self.data.index.itemid.cold_start.old.values, fill_value=[])
            cold_item_features, _ = stack_features(cold_info,
                                                   labels=feature_labels,
                                                   normalize=False)
        else:
            w = self.data.item_relations.T.dot(v).T
            wwt_inv = np.linalg.pinv(w @ w.T)
            cold_item_features = self.data.cold_items_similarity

        cold_items_factors = cold_item_features.dot(w.T) @ wwt_inv
        scores = cold_items_factors @ (u * s[None, :]).T
        top_similar_users = self.get_topk_elements(scores).astype(np.intp)
        return top_similar_users
Beispiel #2
0
    def build(self):
        self._model = LightFM(no_components=self.rank,
                              item_alpha=self.item_alpha,
                              user_alpha=self.user_alpha,
                              loss=self.loss,
                              learning_rate=self.learning_rate,
                              learning_schedule=self.learning_schedule,
                              max_sampled=self.max_sampled,
                              random_state=self.seed)
        fit = getattr(self._model, self.fit_method)

        matrix = self.get_training_matrix()

        if self.item_features is not None:
            item_features = self.item_features.reindex(
                self.data.index.itemid.old.values, fill_value=[])
            self._item_features_csr, self.item_feature_labels = stack_features(
                item_features,
                add_identity=self.item_identity,
                normalize=True,
                dtype='f4')
        if self.user_features is not None:
            user_features = self.user_features.reindex(
                self.data.index.userid.training.old.values, fill_value=[])
            self._user_features_csr, self.user_feature_labels = stack_features(
                user_features,
                add_identity=self.user_identity,
                normalize=True,
                dtype='f4')

        with Timer(self.method, verbose=self.verbose):
            fit(matrix,
                item_features=self._item_features_csr,
                user_features=self._user_features_csr)
Beispiel #3
0
    def build(self):
        # prepare input matrix for learning the model
        Xs, lbls = stack_features(self.item_data, normalize=False) # item-features sparse matrix
        Xu = self.get_training_matrix().T # item-user sparse matrix

        n_nbrs = min(self.max_neighbours, int(math.sqrt(Xs.shape[0])))
        A = construct_A(Xs, n_nbrs, binary=self.binary_features)
        
        with track_time(self.training_time, verbose=self.verbose, model=self.method):
            W, Hu, Hs = LCE(Xs, Xu, A,
                            k=self.rank,
                            alpha=self.alpha,
                            beta=self.beta,
                            lamb=self.regularization,
                            epsilon=self.tolerance,
                            maxiter=self.max_iterations,
                            seed=self.seed,
                            verbose=self.show_error)
        
        userid = self.data.fields.userid
        itemid = self.data.fields.itemid
        self.factors[userid] = Hu.T
        self.factors[itemid] = W
        self.factors['item_features'] = Hs.T
        self.feature_labels = lbls
def get_similarity_data(meta_info,
                        metric='common',
                        assume_binary=True,
                        fill_diagonal=True):
    feat_mat, lbls = stack_features(meta_info, normalize=False)

    if metric == 'common':
        item_similarity = feat_mat.dot(feat_mat.T)
        item_similarity = item_similarity / item_similarity.data.max()
        item_similarity.setdiag(1.0)

    if (metric == 'cosine') or (metric == 'salton'):
        item_similarity = cosine_similarity(feat_mat,
                                            assume_binary=assume_binary,
                                            fill_diagonal=fill_diagonal)

    if item_similarity.format == 'csr':
        item_similarity = item_similarity.T  # ensure CSC format (matrix is symmetric)

    userid = 'userid'
    itemid = meta_info.index.name
    similarities = {userid: None, itemid: item_similarity}
    indices = {userid: None, itemid: meta_info.index}
    labels = {userid: None, itemid: lbls}
    return similarities, indices, labels
Beispiel #5
0
    def build(self):
        self._model = LightFM(no_components=self.rank,
                              item_alpha=self.item_alpha,
                              user_alpha=self.user_alpha,
                              loss=self.loss,
                              learning_rate=self.learning_rate,
                              learning_schedule=self.learning_schedule,
                              max_sampled=self.max_sampled,
                              random_state=self.seed)
        fit = getattr(self._model, self.fit_method)

        matrix = self.get_training_matrix(
            sparse_format='coo')  # as reqired by LightFM

        try:
            item_index = self.data.index.itemid.training
        except AttributeError:
            item_index = self.data.index.itemid

        if self.item_features is not None:
            item_features = self.item_features.reindex(item_index.old.values,
                                                       fill_value=[])
            self._item_features_csr, self.item_features_labels = stack_features(
                item_features,
                add_identity=self.item_identity,
                normalize=self.normalize_item_features,
                dtype='f4')
        if self.user_features is not None:
            user_features = self.user_features.reindex(
                self.data.index.userid.training.old.values, fill_value=[])
            self._user_features_csr, self.user_features_labels = stack_features(
                user_features,
                add_identity=self.user_identity,
                normalize=self.normalize_user_features,
                dtype='f4')

        with track_time(self.training_time,
                        verbose=self.verbose,
                        model=self.method):
            fit(matrix,
                item_features=self._item_features_csr,
                user_features=self._user_features_csr,
                **self.fit_params)
Beispiel #6
0
    def get_recommendations(self):
        userid = self.data.fields.userid
        itemid = self.data.fields.itemid

        Hu = self.factors[userid].T
        Hs = self.factors['item_features'].T
        cold_info = self.item_features.reindex(self.data.index.itemid.cold_start.old.values,
                                               fill_value=[])
        cold_item_features, _ = stack_features(cold_info, labels=self.feature_labels, normalize=False)        
        
        cold_items_factors = cold_item_features.dot(Hs.T).dot(np.linalg.pinv(Hs @ Hs.T))
        cold_items_factors[cold_items_factors < 0] = 0
        
        scores = cold_items_factors @ Hu
        top_relevant_users = self.get_topk_elements(scores).astype(np.intp)
        return top_relevant_users