コード例 #1
0
ファイル: SLIM_BPR.py プロジェクト: GiovanniGabbolini/rs
    def recommend_batch(self, userids, N=10, urm=None, filter_already_liked=True, with_scores=False,
                        items_to_exclude=[], verbose=False):
        user_profile_batch = self.URM_train[userids]
        scores_array = user_profile_batch.dot(self.W_sparse).toarray()

        if filter_already_liked:
            scores_array[user_profile_batch.nonzero()] = -np.inf

        if len(items_to_exclude) > 0:
            raise NotImplementedError('Items to exclude functionality is not implemented yet')

        i = 0
        l = []
        for row_index in range(scores_array.shape[0]):
            scores = scores_array[row_index]

            relevant_items_partition = (-scores).argpartition(N)[0:N]
            relevant_items_partition_sorting = np.argsort(-scores[relevant_items_partition])
            ranking = relevant_items_partition[relevant_items_partition_sorting]
            if with_scores:
                s = scores_array[row_index, ranking]
                l.append([userids[row_index]] + [list(zip(list(ranking), list(s)))])
            else:
                l.append([userids[row_index]] + list(ranking))
            if verbose:
                 i += 1
                 log.progressbar(i, scores_array.shape[0], prefix='Building recommendations ')

        return l
コード例 #2
0
    def fit(self, R):
        self.dataset = R
        # compute the confidence matrix
        if self.scaling == 'linear':
            C = self._linear_scaling(R)
        else:
            C = self._log_scaling(R)

        Ct = C.T.tocsr()
        M, N = R.shape

        # set the seed
        np.random.seed(self.rnd_seed)

        # initialize the latent factors
        self.X = np.random.normal(self.init_mean,
                                  self.init_std,
                                  size=(M, self.num_factors))
        self.Y = np.random.normal(self.init_mean,
                                  self.init_std,
                                  size=(N, self.num_factors))

        for it in range(self.iters):
            self.X = self._lsq_solver_fast(C, self.X, self.Y, self.reg)
            self.Y = self._lsq_solver_fast(Ct, self.Y, self.X, self.reg)
            log.progressbar(it + 1, self.iters)
            log.error('Finished iter {}'.format(it + 1))
コード例 #3
0
    def recommend_batch(self,
                        userids,
                        urm,
                        N=10,
                        filter_already_liked=True,
                        with_scores=True,
                        items_to_exclude=[],
                        verbose=False):
        """
        Recommend the N best items for the specified list of users

        Parameters
        ----------
        userids : list of int
            The user ids to calculate recommendations for
        urm : csr_matrix
            A sparse matrix of shape (number_users, number_items). This allows to look
            up the liked items and their weights for the user. It is used to filter out
            items that have already been liked from the output, and to also potentially
            giving more information to choose the best items for this user.
        N : int, optional
            The number of recommendations to return
        items_to_exclude : list of ints, optional
            List of extra item ids to filter out from the output

        Returns
        -------
        list
            List of (user_id, recommendations), where recommendation
            is a list of length N of (itemid, score) tuples:
                [   [7,  [(18,0.7), (11,0.6), ...] ],
                    [13, [(65,0.9), (83,0.4), ...] ],
                    [25, [(30,0.8), (49,0.3), ...] ], ... ]
        """
        i = 0
        L = len(userids)
        result = []
        for userid in userids:
            recs = self.recommend(userid,
                                  N=N,
                                  urm=urm,
                                  filter_already_liked=filter_already_liked,
                                  with_scores=with_scores,
                                  items_to_exclude=items_to_exclude)
            result.append(recs)
            if verbose:
                i += 1
                log.progressbar(i, L, prefix='Building recommendations ')
        return result
コード例 #4
0
ファイル: itembased.py プロジェクト: GiovanniGabbolini/rs
def validate(self,
             ks,
             alphas,
             betas,
             ls,
             cs,
             shrinks,
             filename='splus_validation',
             path='validation_results',
             verbose=False):
    distance = CFItemBased.SIM_SPLUS

    # ks = [100, 200, 300]
    # alphas = [0.25, 0.5, 0.75]
    # betas = [0.25, 0.5, 0.75]
    # ls = [0.25, 0.5, 0.75]
    # cs = [0.25, 0.5, 0.75]
    # shrinks = [0, 10, 30]

    i = 0
    tot = len(ks) * len(alphas) * len(betas) * len(ls) * len(cs) * len(shrinks)

    filename = datedir.create_folder(rootpath=path,
                                     filename=filename,
                                     extension='txt')
    with open(filename, 'w') as file:
        for k in ks:
            for a in alphas:
                for b in betas:
                    for l in ls:
                        for c in cs:
                            for shrink in shrinks:
                                model = CFItemBased()
                                recs, map10 = model.run(distance=distance,
                                                        k=k,
                                                        shrink=shrink,
                                                        alpha=a,
                                                        beta=b,
                                                        c=c,
                                                        l=l,
                                                        export=False,
                                                        verbose=verbose)
                                logmsg = 'MAP: {} \tknn: {} \ta: {} \tb: {} \tl: {} \tc: {} \tshrink: {}\n'.format(
                                    map10, k, a, b, l, c, shrink)
                                #log.warning(logmsg)
                                file.write(logmsg)

                                i += 1
                                log.progressbar(i, tot, prefix='Validation: ')
コード例 #5
0
def create_ucm_from_urm(urm_train):
    """
    Create ucm

    @Params
    proc_int        (ProcessInteractions) personalizes the preprocess of the train.csv dataframe
    split           (Split) personalizes the split into train and test of data coming after ProcessInteractions
    save_dataframes (Bool) whether to save the train and test dataframes or not
    """
    path = "raw_data/ucm" + str(randint(1, 100))
    print('starting dataset creation of UCM in ' + path)

    # maybe can be better a dense array?
    ICM = csr_matrix(create_icm(d.get_tracks_df(), []))
    UCM = lil_matrix((d.N_PLAYLISTS,ICM.shape[1]), dtype=np.int)
    for p in range(d.N_PLAYLISTS):
        track_indices = urm_train[p].nonzero()[1]
        for track_id in track_indices:
            UCM[p] += ICM.getrow(track_id)
        log.progressbar(p, d.N_PLAYLISTS)

    # save matrices
    os.mkdir(path)
    save_npz(path + '/ucm', UCM)