Esempio n. 1
0
    def get_S_incremental_and_set_W(self):

        self.S_incremental = self.cythonEpoch.get_S()

        if self.train_with_sparse_weights:
            self.W_sparse = self.S_incremental
            self.W_sparse = check_matrix(self.W_sparse, format="csr")
        else:
            self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK)
            self.W_sparse = check_matrix(self.W_sparse, format="csr")
Esempio n. 2
0
    def fit(self,
            topK=None,
            l2_norm=1e3,
            normalize_matrix=False,
            verbose=True):

        self.verbose = verbose

        start_time = time.time()
        self._print("Fitting model... ")

        if normalize_matrix:
            # Normalize rows and then columns
            self.URM_train = normalize(self.URM_train, norm="l2", axis=1)
            self.URM_train = normalize(self.URM_train, norm="l2", axis=0)
            self.URM_train = sps.csr_matrix(self.URM_train)

        # Grahm matrix is X X^t, compute dot product
        similarity = Compute_Similarity(
            self.URM_train,
            shrink=0,
            topK=self.URM_train.shape[1],
            normalize=False,
            similarity="cosine",
        )
        grahm_matrix = similarity.compute_similarity().toarray()

        diag_indices = np.diag_indices(grahm_matrix.shape[0])

        grahm_matrix[diag_indices] += l2_norm

        P = np.linalg.inv(grahm_matrix)

        B = P / (-np.diag(P))

        B[diag_indices] = 0.0

        new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() -
                                                                start_time)
        self._print("Fitting model... done in {:.2f} {}".format(
            new_time_value, new_time_unit))

        # Check if the matrix should be saved in a sparse or dense format
        # The matrix is sparse, regardless of the presence of the topK, if nonzero cells are less than sparse_threshold_quota %
        if topK is not None:
            B = similarityMatrixTopK(B, k=topK, verbose=False)

        if self._is_content_sparse_check(B):
            self._print("Detected model matrix to be sparse, changing format.")
            self.W_sparse = check_matrix(B, format="csr", dtype=np.float32)

        else:
            self.W_sparse = check_matrix(B, format="npy", dtype=np.float32)
            self._W_sparse_format_checked = True
            self._compute_item_score = self._compute_score_W_dense
Esempio n. 3
0
    def __init__(self, URM_train, Similarity_1, Similarity_2):
        super(ItemKNNSimilarityHybridRecommender, self).__init__(URM_train)

        if Similarity_1.shape != Similarity_2.shape:
            raise ValueError(
                "ItemKNNSimilarityHybridRecommender: similarities have different size, S1 is {}, S2 is {}"
                .format(Similarity_1.shape, Similarity_2.shape))

        # CSR is faster during evaluation
        self.Similarity_1 = check_matrix(Similarity_1.copy(), 'csr')
        self.Similarity_2 = check_matrix(Similarity_2.copy(), 'csr')
Esempio n. 4
0
    def __init__(self, URM_train, Similarity_1, Similarity_2, sparse_weights=True):
        super(ItemKNNSimilarityHybridRecommender, self).__init__(URM_train)

        if Similarity_1.shape != Similarity_2.shape:
            raise ValueError(
                "ItemKNNSimilarityHybridRecommender: similarities have different size, S1 is {}, S2 is {}".format(
                    Similarity_1.shape, Similarity_2.shape
                )
            )

        self.Similarity_1 = check_matrix(Similarity_1.copy(), "csr")
        self.Similarity_2 = check_matrix(Similarity_2.copy(), "csr")
    def __init__(self, urm_train, Similarity_1, Similarity_2, verbose=True):
        super(UserSimilarityHybridRecommender, self).__init__(urm_train,
                                                              verbose=verbose)

        if Similarity_1.shape != Similarity_2.shape:
            raise ValueError(
                "UserSimilarityHybridRecommender: similarities have different size, S1 is {}, S2 is {}"
                .format(Similarity_1.shape, Similarity_2.shape))

        # CSR is faster during evaluation
        self.Similarity_1 = check_matrix(Similarity_1.copy(), 'csr')
        self.Similarity_2 = check_matrix(Similarity_2.copy(), 'csr')
Esempio n. 6
0
    def fit(self, lambda_user=10, lambda_item=25):

        self.lambda_user = lambda_user
        self.lambda_item = lambda_item
        self.n_items = self.URM_train.shape[1]

        # convert to csc matrix for faster column-wise sum
        self.URM_train = check_matrix(self.URM_train, "csc", dtype=np.float32)

        # 1) global average
        self.mu = (
            self.URM_train.data.sum(dtype=np.float32) / self.URM_train.data.shape[0]
        )

        # 2) item average bias
        # compute the number of non-zero elements for each column
        col_nnz = np.diff(self.URM_train.indptr)

        # it is equivalent to:
        # col_nnz = X.indptr[1:] - X.indptr[:-1]
        # and it is **much faster** than
        # col_nnz = (X != 0).sum(axis=0)

        URM_train_unbiased = self.URM_train.copy()
        URM_train_unbiased.data -= self.mu
        self.item_bias = URM_train_unbiased.sum(axis=0) / (col_nnz + self.lambda_item)
        self.item_bias = np.asarray(
            self.item_bias
        ).ravel()  # converts 2-d matrix to 1-d array without anycopy

        # 3) user average bias
        # NOTE: the user bias is *useless* for the sake of ranking items. We just show it here for educational purposes.

        # first subtract the item biases from each column
        # then repeat each element of the item bias vector a number of times equal to col_nnz
        # and subtract it from the data vector
        URM_train_unbiased.data -= np.repeat(self.item_bias, col_nnz)

        # now convert the csc matrix to csr for efficient row-wise computation
        URM_train_unbiased_csr = URM_train_unbiased.tocsr()
        row_nnz = np.diff(URM_train_unbiased_csr.indptr)
        # finally, let's compute the bias
        self.user_bias = URM_train_unbiased_csr.sum(axis=1).ravel() / (
            row_nnz + self.lambda_user
        )

        # 4) precompute the item ranking by using the item bias only
        # the global average and user bias won't change the ranking, so there is no need to use them
        # self.item_ranking = np.argsort(self.bi)[::-1]

        self.URM_train = check_matrix(self.URM_train, "csr", dtype=np.float32)
Esempio n. 7
0
    def __init__(self, URM_train, UCM_train, ICM_train, verbose=True):

        super(HERSWrapper, self).__init__(URM_train, verbose=verbose)

        assert self.n_users == UCM_train.shape[
            0], "{}: URM_train has {} users but UCM_train has {}".format(
                self.RECOMMENDER_NAME, self.n_users, UCM_train.shape[0])

        self.UCM_train = check_matrix(UCM_train.copy(),
                                      'csr',
                                      dtype=np.float32)
        self.UCM_train.eliminate_zeros()

        self._cold_user_CBF_mask = np.ediff1d(self.UCM_train.indptr) == 0

        if self._cold_user_CBF_mask.any():
            print("{}: UCM Detected {} ({:.2f} %) cold users.".format(
                self.RECOMMENDER_NAME, self._cold_user_CBF_mask.sum(),
                self._cold_user_CBF_mask.sum() / self.n_users * 100))

        assert self.n_items == ICM_train.shape[
            0], "{}: URM_train has {} items but ICM_train has {}".format(
                self.RECOMMENDER_NAME, self.n_items, ICM_train.shape[0])

        self.ICM_train = check_matrix(ICM_train.copy(),
                                      'csr',
                                      dtype=np.float32)
        self.ICM_train.eliminate_zeros()

        self._cold_item_CBF_mask = np.ediff1d(self.ICM_train.indptr) == 0

        if self._cold_item_CBF_mask.any():
            print("{}: ICM Detected {} ({:.2f} %) items with no features.".
                  format(self.RECOMMENDER_NAME, self._cold_item_CBF_mask.sum(),
                         self._cold_item_CBF_mask.sum() / self.n_items * 100))

        self.G_ui = np.swapaxes(
            np.asarray(self.URM_train.nonzero(), dtype=np.int32), 0, 1)
        self.G_user = nx.convert_matrix.from_scipy_sparse_matrix(
            self.UCM_train, create_using=nx.DiGraph())
        self.G_user = self.G_user.to_undirected()
        self.G_user.remove_nodes_from(list(nx.isolates(self.G_user)))

        self.G_item = nx.convert_matrix.from_scipy_sparse_matrix(
            self.ICM_train, create_using=nx.DiGraph())
        self.G_item = self.G_item.to_undirected()
        self.G_item.remove_nodes_from(list(nx.isolates(self.G_item)))

        # This is used in _compute_item_score
        self._item_indices = np.arange(0, self.n_items, dtype=np.int32)
Esempio n. 8
0
    def fit(self,
            l1_ratio=0.1,
            positive_only=True,
            topK=100,
            workers=multiprocessing.cpu_count(),
            max_iter=100,
            alpha=0.01):

        assert l1_ratio >= 0 and l1_ratio <= 1, "SLIM_ElasticNet: l1_ratio must be between 0 and 1, provided value was {}".format(
            l1_ratio)

        self.l1_ratio = l1_ratio
        self.positive_only = positive_only
        self.topK = topK
        self.max_iter = max_iter
        self.alpha = alpha

        self.workers = workers

        self.URM_train = check_matrix(self.URM_train, 'csc', dtype=np.float32)
        n_items = self.URM_train.shape[1]
        # fit item's factors in parallel

        #oggetto riferito alla funzione nel quale predefinisco parte dell'input
        _pfit = partial(self._partial_fit,
                        X=self.URM_train,
                        topK=self.topK,
                        alpha=self.alpha,
                        max_iter=self.max_iter)

        #creo un pool con un certo numero di processi
        pool = Pool(processes=self.workers)

        #avvio il pool passando la funzione (con la parte fissa dell'input)
        #e il rimanente parametro, variabile
        res = pool.map(_pfit, np.arange(n_items))

        # res contains a vector of (values, rows, cols) tuples
        values, rows, cols = [], [], []
        for values_, rows_, cols_ in res:
            values.extend(values_)
            rows.extend(rows_)
            cols.extend(cols_)

        # generate the sparse weight matrix
        self.W_sparse = sps.csr_matrix((values, (rows, cols)),
                                       shape=(n_items, n_items),
                                       dtype=np.float32)
        self.URM_train = check_matrix(self.URM_train, 'csr', dtype=np.float32)
    def __init__(self, URM_train, ICM_train):
        super(ScoresHybridKNNCFKNNCBF, self).__init__(URM_train)

        self.URM_train = check_matrix(URM_train.copy(), 'csr')
        self.ICM_train = ICM_train
        self.itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train)
        self.itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender(URM_train, ICM_train)
Esempio n. 10
0
    def __init__(self, URM_train):
        super(PureSVDRecommender, self).__init__()

        # CSR is faster during evaluation
        self.URM_train = check_matrix(URM_train, 'csr')

        self.compute_item_score = self.compute_score_SVD
    def __init__(self, urm_train, eurm=False):
        super(HybridGenRecommender, self).__init__(urm_train)

        self.data_folder = Path(__file__).parent.parent.absolute()

        self.eurm = eurm

        self.num_users = urm_train.shape[0]
        data = DataManager()

        urm_train = check_matrix(urm_train.copy(), 'csr')
        icm_price, icm_asset, icm_sub, icm_all = data.get_icm()
        ucm_age, ucm_region, ucm_all = data.get_ucm()

        recommender_1 = ItemKNNCBFRecommender(urm_train, icm_all)
        recommender_1.fit(shrink=40, topK=20, feature_weighting='BM25')

        # recommender_2 = UserKNNCBFRecommender(urm_train, ucm_all)
        # recommender_2.fit(shrink=500, topK=1600, normalize=True)

        recommender_2 = UserKNNCBFRecommender(urm_train, ucm_all)
        recommender_2.fit(shrink=1777,
                          topK=1998,
                          similarity='tversky',
                          feature_weighting='BM25',
                          tversky_alpha=0.1604953616,
                          tversky_beta=0.9862348646)

        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
Esempio n. 12
0
    def __init__(self, URM_train):

        super(BaseRecommender, self).__init__()

        self.URM_train = check_matrix(URM_train.copy(),
                                      'csr',
                                      dtype=np.float32)
        self.URM_train.eliminate_zeros()

        self.n_users, self.n_items = self.URM_train.shape

        self.normalize = False

        self.filterTopPop = False
        self.filterTopPop_ItemsID = np.array([], dtype=np.int)

        self.items_to_ignore_flag = False
        self.items_to_ignore_ID = np.array([], dtype=np.int)

        self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0

        if self._cold_user_mask.any():
            print("{}: Detected {} ({:.2f} %) cold users.".format(
                self.RECOMMENDER_NAME, self._cold_user_mask.sum(),
                self._cold_user_mask.sum() / len(self._cold_user_mask) * 100))

        self._cold_item_mask = np.ediff1d(self.URM_train.tocsc().indptr) == 0

        if self._cold_item_mask.any():
            print("{}: Detected {} ({:.2f} %) cold items.".format(
                self.RECOMMENDER_NAME, self._cold_item_mask.sum(),
                self._cold_item_mask.sum() / len(self._cold_item_mask) * 100))
Esempio n. 13
0
def remove_empty_rows_and_cols(URM, ICM=None):

    URM = check_matrix(URM, "csr")
    rows = URM.indptr
    numRatings = np.ediff1d(rows)
    user_mask = numRatings >= 1

    URM = URM[user_mask, :]

    cols = URM.tocsc().indptr
    numRatings = np.ediff1d(cols)
    item_mask = numRatings >= 1

    URM = URM[:, item_mask]

    removedUsers = np.arange(0, len(user_mask))[np.logical_not(user_mask)]
    removedItems = np.arange(0, len(item_mask))[np.logical_not(item_mask)]

    if ICM is not None:

        ICM = ICM[item_mask, :]

        return URM.tocsr(), ICM.tocsr(), removedUsers, removedItems

    return URM.tocsr(), removedUsers, removedItems
    def __init__(self, URM_train, Recommender_1, Recommender_2, Recommender_3):
        super(ScoresHybrid3Recommender, self).__init__(URM_train)

        self.URM_train = check_matrix(URM_train.copy(), 'csr')
        self.Recommender_1 = Recommender_1
        self.Recommender_2 = Recommender_2
        self.Recommender_3 = Recommender_3
Esempio n. 15
0
    def __init__(self, URM_train, Recommender_1, Recommender_2):
        super(ItemKNNScoresHybridRecommender_Normalized,
              self).__init__(URM_train)

        self.URM_train = check_matrix(URM_train.copy(), 'csr')
        self.Recommender_1 = Recommender_1
        self.Recommender_2 = Recommender_2
Esempio n. 16
0
    def fit(self,
            URM_train,
            topK=500,
            alpha=1.,
            min_rating=0,
            implicit=False,
            normalize_similarity=False,
            tuning=False,
            similarity_path=SIMILARITY_PATH):

        self.URM_train = check_matrix(URM_train.copy(),
                                      'csr',
                                      dtype=np.float32)
        self.URM_train.eliminate_zeros()
        self.n_users, self.n_items = self.URM_train.shape

        self.topK = topK
        self.alpha = alpha
        self.min_rating = min_rating
        self.implicit = implicit
        self.normalize_similarity = normalize_similarity

        if tuning:
            if not os.path.exists(os.getcwd() + similarity_path):
                self.run_fit()
                self.helper.export_similarity_matrix(os.getcwd() +
                                                     similarity_path,
                                                     self.W_sparse,
                                                     name=RECOMMENDER_NAME)
            self.W_sparse = self.helper.import_similarity_matrix(
                os.getcwd() + similarity_path)

        else:
            self.run_fit()
        self.similarityProduct = self.URM_train.dot(self.W_sparse)
    def __init__(self,
                 URM_train,
                 ICM,
                 recommender_list,
                 d_weights=None,
                 dynamic=False,
                 weights=None,
                 URM_validation=None,
                 sparse_weights=True):
        super(Recommender, self).__init__()

        # CSR is faster during evaluation
        self.URM_train = check_matrix(URM_train, 'csr')
        self.URM_validation = URM_validation
        self.dynamic = dynamic
        self.dataset = None
        self.d_weights = d_weights
        self.sparse_weights = sparse_weights

        self.recommender_list = []
        self.weights = weights

        for recommender in recommender_list:
            if recommender in [
                    SLIM_BPR_Cython, MatrixFactorization_BPR_Cython
            ]:
                print("class recognized")
                self.recommender_list.append(
                    recommender(URM_train, URM_validation=URM_validation))
            elif recommender is ItemKNNCBFRecommender:
                self.recommender_list.append(recommender(ICM, URM_train))
            else:
                self.recommender_list.append(recommender(URM_train))
    def fit(self,
            similarities,
            weights=None,
            topK=100,
            normalize_weights=True):

        # Initialize weights array if not already initialized
        if weights is None:
            weights = np.array([1 for _ in similarities])

        # Checking the input parameters are well formatted
        assert len(similarities) == len(weights)
        assert len(similarities) > 0

        # Cast weights to numpy array if it is not
        weights = np.array(weights, dtype=np.float)
        # Normalize the weights
        if normalize_weights:
            weights /= weights.max()

        # Create a list of pairs (similarity, weight)
        similarity_and_weight = zip(similarities, weights)

        # Initialize the result
        W_sparse = sps.csr_matrix(similarities[0].shape, dtype=np.float)

        # Compute the new Similarity matrix
        for similarity, weight in similarity_and_weight:
            W_sparse += (similarity * weight)

        self.W_sparse = similarityMatrixTopK(W_sparse, k=topK)
        self.W_sparse = check_matrix(self.W_sparse, format='csr')
    def __init__(self, urm_train):
        super(HybridNorm3Recommender, self).__init__(urm_train)

        urm_train = check_matrix(urm_train.copy(), 'csr')
        self.num_users = urm_train.shape[0]

        # recommender_1 = HybridGenRecommender(urm_train)
        # recommender_1.fit()

        recommender_1 = RP3betaRecommender(urm_train)
        recommender_1.fit(topK=16,
                          alpha=0.03374950051351756,
                          beta=0.24087176329409027,
                          normalize_similarity=True)

        recommender_3 = UserKNNCFRecommender(urm_train)
        recommender_3.fit(shrink=2, topK=600, normalize=True)

        recommender_2 = ItemKNNCFRecommender(urm_train)
        recommender_2.fit(topK=5,
                          shrink=500,
                          feature_weighting='BM25',
                          similarity='tversky',
                          normalize=False,
                          tversky_alpha=0.0,
                          tversky_beta=1.0)

        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        self.recommender_3 = recommender_3
Esempio n. 20
0
    def applyPearsonCorrelation(self):
        """
        Remove from every data point the average for the corresponding column
        :return:
        """

        self.dataMatrix = check_matrix(self.dataMatrix, 'csc')


        interactionsPerCol = np.diff(self.dataMatrix.indptr)

        nonzeroCols = interactionsPerCol > 0
        sumPerCol = np.asarray(self.dataMatrix.sum(axis=0)).ravel()

        colAverage = np.zeros_like(sumPerCol)
        colAverage[nonzeroCols] = sumPerCol[nonzeroCols] / interactionsPerCol[nonzeroCols]


        # Split in blocks to avoid duplicating the whole data structure
        start_col = 0
        end_col= 0

        blockSize = 1000


        while end_col < self.n_columns:

            end_col = min(self.n_columns, end_col + blockSize)

            self.dataMatrix.data[self.dataMatrix.indptr[start_col]:self.dataMatrix.indptr[end_col]] -= \
                np.repeat(colAverage[start_col:end_col], interactionsPerCol[start_col:end_col])

            start_col += blockSize
    def __init__(self, URM_train):

        super(GlobalEffects, self).__init__()

        self.URM_train = check_matrix(URM_train, 'csc', dtype=np.float32)

        self.compute_item_score = self.compute_score_global_effects
Esempio n. 22
0
    def applyAdjustedCosine(self):
        """
        Remove from every data point the average for the corresponding row
        :return:
        """

        self.dataMatrix = check_matrix(self.dataMatrix, 'csr')


        interactionsPerRow = np.diff(self.dataMatrix.indptr)

        nonzeroRows = interactionsPerRow > 0
        sumPerRow = np.asarray(self.dataMatrix.sum(axis=1)).ravel()

        rowAverage = np.zeros_like(sumPerRow)
        rowAverage[nonzeroRows] = sumPerRow[nonzeroRows] / interactionsPerRow[nonzeroRows]


        # Split in blocks to avoid duplicating the whole data structure
        start_row = 0
        end_row= 0

        blockSize = 1000


        while end_row < self.n_rows:

            end_row = min(self.n_rows, end_row + blockSize)

            self.dataMatrix.data[self.dataMatrix.indptr[start_row]:self.dataMatrix.indptr[end_row]] -= \
                np.repeat(rowAverage[start_row:end_row], interactionsPerRow[start_row:end_row])

            start_row += blockSize
Esempio n. 23
0
    def fit(self,
            topK=50,
            shrink=100,
            similarity='cosine',
            normalize=True,
            feature_weighting="none",
            **similarity_args):

        self.topK = topK
        self.shrink = shrink

        if feature_weighting not in self.FEATURE_WEIGHTING_VALUES:
            raise ValueError(
                "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'"
                .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting))

        if feature_weighting == "BM25":
            self.UCM_train = self.UCM_train.astype(np.float32)
            self.UCM_train = okapi_BM_25(self.UCM_train)

        elif feature_weighting == "TF-IDF":
            self.UCM_train = self.UCM_train.astype(np.float32)
            self.UCM_train = TF_IDF(self.UCM_train)

        similarity = Compute_Similarity(self.UCM_train.T,
                                        shrink=shrink,
                                        topK=topK,
                                        normalize=normalize,
                                        similarity=similarity,
                                        **similarity_args)

        self.W_sparse = similarity.compute_similarity()
        self.W_sparse = check_matrix(self.W_sparse, format='csr')
    def __init__(self, URM_train):
        super(Random, self).__init__()

        # convert to csc matrix for faster column-wise sum
        self.URM_train = check_matrix(URM_train, 'csr', dtype=np.float32)

        self.compute_item_score = self.compute_score_random
Esempio n. 25
0
    def __init__(self, URM_train, verbose=True):

        super(BaseRecommender, self).__init__()

        self.URM_train = check_matrix(URM_train.copy(), "csr", dtype=np.float32)
        self.URM_train.eliminate_zeros()

        self.n_users, self.n_items = self.URM_train.shape
        self.verbose = verbose

        self.filterTopPop = False
        self.filterTopPop_ItemsID = np.array([], dtype=np.int)

        self.items_to_ignore_flag = False
        self.items_to_ignore_ID = np.array([], dtype=np.int)

        self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0

        if self._cold_user_mask.any():
            self._print(
                "URM Detected {} ({:.2f} %) cold users.".format(
                    self._cold_user_mask.sum(),
                    self._cold_user_mask.sum() / self.n_users * 100,
                )
            )

        self._cold_item_mask = np.ediff1d(self.URM_train.tocsc().indptr) == 0

        if self._cold_item_mask.any():
            self._print(
                "URM Detected {} ({:.2f} %) cold items.".format(
                    self._cold_item_mask.sum(),
                    self._cold_item_mask.sum() / self.n_items * 100,
                )
            )
    def __init__(self, URM_train):
        super(RP3betaRecommender, self).__init__()

        self.URM_train = check_matrix(URM_train,
                                      format='csr',
                                      dtype=np.float32)
        self.sparse_weights = True
    def fit(self, item_weights, URM_train, selectTopK=False):

        self.URM_train = check_matrix(URM_train, format='csc')

        # If no topK selection is required, just save the similarity
        if (not selectTopK):
            if isinstance(item_weights, np.ndarray):
                #self.W = item_weights
                #self.sparse_weights = False
                self.W_sparse = sps.csr_matrix(item_weights)
                self.sparse_weights = True
            else:
                self.W_sparse = check_matrix(item_weights, format='csr')
                self.sparse_weights = True

            return

        # If matrix is not dense, make it dense to select top K
        if not isinstance(item_weights, np.ndarray):
            item_weights = item_weights.toarray()

        idx_sorted = np.argsort(item_weights, axis=0)  # sort by column

        # for each column, keep only the top-k scored items

        if not self.sparse_weights:
            self.W = item_weights.copy()
            # index of the items that don't belong to the top-k similar items of each column
            not_top_k = idx_sorted[:-self.k, :]
            # use numpy fancy indexing to zero-out the values in sim without using a for loop
            self.W[not_top_k, np.arange(item_weights.shape[1])] = 0.0
        else:
            # iterate over each column and keep only the top-k similar items
            values, rows, cols = [], [], []
            nitems = self.URM_train.shape[1]
            for i in range(nitems):

                top_k_idx = idx_sorted[-self.k:, i]

                values.extend(item_weights[top_k_idx, i])
                rows.extend(np.arange(nitems)[top_k_idx])
                cols.extend(np.ones(self.k) * i)

                # During testing CSR is faster
            self.W_sparse = sps.csr_matrix((values, (rows, cols)),
                                           shape=(nitems, nitems),
                                           dtype=np.float32)
Esempio n. 28
0
    def _build_confidence_matrix(self, confidence_scaling):

        if confidence_scaling == 'linear':
            self.C = self._linear_scaling_confidence()
        else:
            self.C = self._log_scaling_confidence()

        self.C_csc= check_matrix(self.C.copy(), format="csc", dtype = np.float32)
    def __init__(self, URM_train):
        super(TopPop, self).__init__()

        # convert to csc matrix for faster column-wise sum
        self.URM_train = check_matrix(URM_train, 'csc', dtype=np.float32)
        self.URM_train.eliminate_zeros()

        self.compute_item_score = self.compute_score_top_pop
Esempio n. 30
0
    def __init__(self, URM_train, ICM_train):
        super(ScoresHybridSpecializedV2Mid, self).__init__(URM_train)

        self.URM_train = check_matrix(URM_train.copy(), 'csr')
        self.ICM_train = ICM_train
        self.P3alpha = P3alphaRecommender.P3alphaRecommender(URM_train)
        self.itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender(
            URM_train, ICM_train)