예제 #1
0
    def fit(self,
            topK=50,
            shrink=100,
            similarity='cosine',
            normalize=True,
            feature_weighting="none",
            interactions_feature_weighting="none",
            **similarity_args):

        if interactions_feature_weighting not in self.FEATURE_WEIGHTING_VALUES:
            raise ValueError(
                "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'"
                .format(self.FEATURE_WEIGHTING_VALUES,
                        interactions_feature_weighting))

        if interactions_feature_weighting == "BM25":
            self.URM_train = self.URM_train.astype(np.float32)
            self.URM_train = okapi_BM_25(self.URM_train.T).T
            self.URM_train = check_matrix(self.URM_train, 'csr')

        elif interactions_feature_weighting == "TF-IDF":
            self.URM_train = self.URM_train.astype(np.float32)
            self.URM_train = TF_IDF(self.URM_train.T).T
            self.URM_train = check_matrix(self.URM_train, 'csr')

        super().fit(topK=topK,
                    shrink=shrink,
                    similarity=similarity,
                    normalize=normalize,
                    feature_weighting=feature_weighting,
                    **similarity_args)
예제 #2
0
    def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting="none",
            **similarity_args):

        self.topK = topK
        self.shrink = shrink

        if feature_weighting not in self.FEATURE_WEIGHTING_VALUES:
            raise ValueError(
                "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'".format(
                    self.FEATURE_WEIGHTING_VALUES, feature_weighting))

        if feature_weighting == "BM25":
            self.URM_train = self.URM_train.astype(np.float32)
            self.URM_train = okapi_BM_25(self.URM_train)
            self.URM_train = check_matrix(self.URM_train, 'csr')

        elif feature_weighting == "TF-IDF":
            self.URM_train = self.URM_train.astype(np.float32)
            self.URM_train = TF_IDF(self.URM_train)
            self.URM_train = check_matrix(self.URM_train, 'csr')

        similarity = Compute_Similarity(self.URM_train.T, shrink=shrink, topK=topK, normalize=normalize,
                                        similarity=similarity, **similarity_args)

        self.W_sparse = similarity.compute_similarity()
        self.W_sparse = check_matrix(self.W_sparse, format='csr')
예제 #3
0
    def fit(self,
            topK=50,
            shrink=100,
            similarity='cosine',
            normalize=True,
            feature_weighting="none",
            **similarity_args):

        self.topK = topK
        self.topComputeK = topK + len(self.cold_users)
        self.shrink = shrink

        if feature_weighting not in self.FEATURE_WEIGHTING_VALUES:
            raise ValueError(
                "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'"
                .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting))

        if feature_weighting == "BM25":
            self.UCM_train = self.UCM_train.astype(np.float32)
            self.UCM_train = okapi_BM_25(self.UCM_train)

        elif feature_weighting == "TF-IDF":
            self.UCM_train = self.UCM_train.astype(np.float32)
            self.UCM_train = TF_IDF(self.UCM_train)

        similarity = Compute_Similarity(self.UCM_train.T,
                                        shrink=shrink,
                                        topK=self.topComputeK,
                                        normalize=normalize,
                                        similarity=similarity,
                                        **similarity_args)

        self.W_sparse = similarity.compute_similarity()
        self.W_sparse = self.W_sparse.tocsc()

        for user in self.cold_users:
            self.W_sparse.data[self.W_sparse.indptr[user]:self.W_sparse.
                               indptr[user + 1]] = 0

        self.W_sparse.eliminate_zeros()
        self.W_sparse = self.W_sparse.tocsr()

        self.W_sparse = similarityMatrixTopK(self.W_sparse,
                                             k=self.topK).tocsr()
        self.W_sparse = check_matrix(self.W_sparse, format='csr')

        # Add identity matrix to the recommender
        self.recommender.W_sparse = self.recommender.W_sparse + sps.identity(
            self.recommender.W_sparse.shape[0], format="csr")
예제 #4
0
    def precompute_best_item_indices(self, URM: sps.csr_matrix):
        URM = URM.copy()
        if self.feature_weighting == "BM25":
            URM = URM.astype(np.float32)
            URM = okapi_BM_25(URM)
            URM = check_matrix(URM, 'csr')

        elif self.feature_weighting == "TF-IDF":
            URM = URM.astype(np.float32)
            URM = TF_IDF(URM)
            URM = check_matrix(URM, 'csr')

        similarity = Compute_Similarity(URM,
                                        shrink=self.shrink,
                                        topK=self.topK,
                                        normalize=self.normalize,
                                        similarity="cosine")
        similarity_matrix = similarity.compute_similarity()
        self.sorted_indices = np.array(
            np.argsort(-similarity_matrix.todense(), axis=1))
예제 #5
0
def apply_feature_weighting(matrix, feature_weighting="none"):
    from course_lib.Base.IR_feature_weighting import okapi_BM_25, TF_IDF
    from course_lib.Base.Recommender_utils import check_matrix

    FEATURE_WEIGHTING_VALUES = ["BM25", "TF-IDF", "none"]

    if feature_weighting not in FEATURE_WEIGHTING_VALUES:
        raise ValueError(
            "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'"
            .format(FEATURE_WEIGHTING_VALUES, feature_weighting))

    if feature_weighting == "BM25":
        matrix = matrix.astype(np.float32)
        matrix = okapi_BM_25(matrix)
        matrix = check_matrix(matrix, 'csr')
    elif feature_weighting == "TF-IDF":
        matrix = matrix.astype(np.float32)
        matrix = TF_IDF(matrix)
        matrix = check_matrix(matrix, 'csr')
    return matrix
예제 #6
0
    def fit(self,
            topK=50,
            shrink=100,
            normalize=True,
            feature_weighting="none"):

        self.topK = topK
        self.shrink = shrink

        if feature_weighting not in self.FEATURE_WEIGHTING_VALUES:
            raise ValueError(
                "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'"
                .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting))

        if feature_weighting == "BM25":
            self.URM_train = self.URM_train.astype(np.float32)
            self.URM_train = okapi_BM_25(self.URM_train.T).T
            self.URM_train = check_matrix(self.URM_train, 'csr')

        elif feature_weighting == "TF-IDF":
            self.URM_train = self.URM_train.astype(np.float32)
            self.URM_train = TF_IDF(self.URM_train.T).T
            self.URM_train = check_matrix(self.URM_train, 'csr')

        denominator = 1 if shrink == 0 else shrink

        self.W_sparse = self.URM_train.T.dot(
            self.URM_train) * (1 / denominator)

        if self.topK >= 0:
            self.W_sparse = userSimilarityMatrixTopK(self.W_sparse,
                                                     k=self.topK).tocsr()

        if normalize:
            self.W_sparse = normalize_sk(self.W_sparse, norm="l2", axis=1)

        self.W_sparse = check_matrix(self.W_sparse, format='csr')
    def fit(self,
            user_topK=50,
            user_shrink=100,
            user_similarity_type='cosine',
            user_normalize=True,
            user_feature_weighting="none",
            user_asymmetric_alpha=0.5,
            item_topK=50,
            item_shrink=100,
            item_similarity_type='cosine',
            item_normalize=True,
            item_feature_weighting="none",
            item_asymmetric_alpha=0.5,
            interactions_feature_weighting="none"):

        if interactions_feature_weighting not in self.FEATURE_WEIGHTING_VALUES:
            raise ValueError(
                "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'"
                .format(self.FEATURE_WEIGHTING_VALUES,
                        interactions_feature_weighting))

        if interactions_feature_weighting == "BM25":
            self.URM_train = self.URM_train.astype(np.float32)
            self.URM_train = okapi_BM_25(self.URM_train)
            self.URM_train = check_matrix(self.URM_train, 'csr')

        elif interactions_feature_weighting == "TF-IDF":
            self.URM_train = self.URM_train.astype(np.float32)
            self.URM_train = TF_IDF(self.URM_train)
            self.URM_train = check_matrix(self.URM_train, 'csr')

        # User Similarity Computation
        self.user_topK = user_topK
        self.user_shrink = user_shrink

        if user_feature_weighting not in self.FEATURE_WEIGHTING_VALUES:
            raise ValueError(
                "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'"
                .format(self.FEATURE_WEIGHTING_VALUES, user_feature_weighting))

        if user_feature_weighting == "BM25":
            self.UCM_train = self.UCM_train.astype(np.float32)
            self.UCM_train = okapi_BM_25(self.UCM_train)

        elif user_feature_weighting == "TF-IDF":
            self.UCM_train = self.UCM_train.astype(np.float32)
            self.UCM_train = TF_IDF(self.UCM_train)

        kwargs = {"asymmetric_alpha": user_asymmetric_alpha}
        user_similarity_compute = Compute_Similarity(
            self.UCM_train.T,
            shrink=user_shrink,
            topK=user_topK,
            normalize=user_normalize,
            similarity=user_similarity_type,
            **kwargs)

        self.user_W_sparse = user_similarity_compute.compute_similarity()
        self.user_W_sparse = check_matrix(self.user_W_sparse, format='csr')

        # Item Similarity Computation
        self.item_topK = item_topK
        self.item_shrink = item_shrink

        if item_feature_weighting not in self.FEATURE_WEIGHTING_VALUES:
            raise ValueError(
                "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'"
                .format(self.FEATURE_WEIGHTING_VALUES, item_feature_weighting))

        if item_feature_weighting == "BM25":
            self.ICM_train = self.ICM_train.astype(np.float32)
            self.ICM_train = okapi_BM_25(self.ICM_train)

        elif item_feature_weighting == "TF-IDF":
            self.ICM_train = self.ICM_train.astype(np.float32)
            self.ICM_train = TF_IDF(self.ICM_train)

        kwargs = {"asymmetric_alpha": item_asymmetric_alpha}
        item_similarity_compute = Compute_Similarity(
            self.ICM_train.T,
            shrink=item_shrink,
            topK=item_topK,
            normalize=item_normalize,
            similarity=item_similarity_type,
            **kwargs)

        self.item_W_sparse = item_similarity_compute.compute_similarity()
        self.item_W_sparse = check_matrix(self.item_W_sparse, format='csr')