Ejemplo n.º 1
0
    def fit(self,
            topK=600,
            shrink=1000,
            similarity='asymmetric',
            normalize=True,
            feature_weighting="BM25",
            save_model=False,
            best_parameters=False,
            **similarity_args):
        similarity_args = {'asymmetric_alpha': 0.40273209903969387}
        if best_parameters:
            m = OfflineDataLoader()
            folder_path_icbf, file_name_icbf = m.get_parameter(
                self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path_icbf,
                           file_name=file_name_icbf)
            if self.feature_weighting == "none":
                pass
            if self.feature_weighting == "BM25":
                self.ICM = self.ICM.astype(np.float32)
                self.ICM = to_okapi(self.ICM)

            elif self.feature_weighting == "TF-IDF":
                self.ICM = self.ICM.astype(np.float32)
                self.ICM = to_tfidf(self.ICM)
            similarity = Compute_Similarity(self.ICM.T,
                                            shrink=shrink,
                                            topK=topK,
                                            normalize=normalize,
                                            similarity=similarity,
                                            **similarity_args)
        else:
            self.topK = topK
            self.shrink = shrink
            similarity = Compute_Similarity(self.ICM.T,
                                            shrink=shrink,
                                            topK=topK,
                                            normalize=normalize,
                                            similarity=similarity,
                                            **similarity_args)

        self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, " \
                          "normalize= {4}".format(
            self.sparse_weights, similarity, self.shrink, self.topK, normalize)

        if self.sparse_weights:
            self.W_sparse = similarity.compute_similarity()
        else:
            self.W = similarity.compute_similarity()
            self.W = self.W.toarray()

        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name="ItemKNNCBFRecommender_submission_model")
    def fit(self, topK=250, shrink=100, alpha=0.7017094, beta=0.51034483, gamma=0.16206897, normalize=False,
            similarity="jaccard", **similarity_args):
        self.k = topK
        self.shrink = shrink
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.normalize = normalize

        print("Item Tree Hybrid Recommender: Model fitting begins")
        # Calculate all the Similarity Matrices One by one
        # URM tfidf --> 50446 x 50446
        self.sim_URM_tfidf = Compute_Similarity(self.URM_train_tfidf.T, shrink=0, topK=200, normalize=normalize,
                                                similarity=similarity, **similarity_args)
        # ICM tfidf --> 20635 x 20635
        self.ICM = to_okapi(self.ICM)
        self.sim_ICM_tfidf = Compute_Similarity(self.ICM.T, shrink=0, topK=25, normalize=normalize,
                                                similarity=similarity, **similarity_args)
        # URM.T tfidf --> 20635 x 20635
        self.sim_URM_T_tfidf = Compute_Similarity(self.URM_train_tfidf, shrink=10, topK=350, normalize=normalize,
                                        similarity=similarity, **similarity_args)
        # Slim --> 20635 x 20635
        self.sim_Slim = Slim_mark1(self.URM_train)

        if self.sparse_weights:
            # URM
            self.W_sparse_URM = self.sim_URM_tfidf.compute_similarity()
            # UCM
            self.W_sparse_ICM = self.sim_ICM_tfidf.compute_similarity()
            # self.W_sparse_UCM = self.sim_UCM_tfidf.fit()
            # ICM
            self.W_sparse_URM_T = self.sim_URM_T_tfidf.compute_similarity()
            # Slim
            # lambda_i = 0.37142857, lambda_j = 0.97857143
            self.W_sparse_Slim = self.sim_Slim.fit()
        # add the parameters for the logging
        self.parameters = "sparse_weights= {}, similarity= {},shrink= {}, neighbourhood={},normalize= {}, alpha= {}, beta={}, gamma={}".format(
            self.sparse_weights, similarity, shrink, topK, normalize,
            alpha, beta, gamma)
    def fit(self,
            topK=175,
            shrink=400,
            similarity="asymmetric",
            normalize=True,
            feature_weighting="BM25",
            save_model=False,
            best_parameters=False,
            location="training",
            submission=False,
            offline=False,
            **similarity_args):
        if offline:
            m = OfflineDataLoader()
            folder_path_icf, file_name_icf = m.get_model(
                self.RECOMMENDER_NAME, training=(not submission))
            self.loadModel(folder_path=folder_path_icf,
                           file_name=file_name_icf)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path_ucf, file_name_ucf = m.get_parameter(
                    self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path_ucf,
                               file_name=file_name_ucf)
                if self.feature_weighting == "none":
                    similarity = Compute_Similarity(self.URM_train.T,
                                                    **similarity_args)
                else:
                    if feature_weighting == "BM25":
                        self.URM_train_copy = self.URM_train.astype(np.float32)
                        self.URM_train_copy = to_okapi(self.URM_train)

                    elif feature_weighting == "TF-IDF":
                        self.URM_train_copy = self.URM_train.astype(np.float32)
                        self.URM_train_copy = to_tfidf(self.URM_train)
                    similarity_args = {
                        'asymmetric_alpha': 0.11483114799990246,
                        'normalize': True,
                        'shrink': 450,
                        'similarity': 'asymmetric',
                        'topK': 200
                    }
                    similarity = Compute_Similarity(self.URM_train_copy.T,
                                                    **similarity_args)
            else:
                self.topK = topK
                self.shrink = shrink
                self.feature_weighting = feature_weighting
                similarity_args = {'asymmetric_alpha': 0.0033404951135529437}
                if self.feature_weighting == "BM25":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_okapi(self.URM_train)

                elif self.feature_weighting == "TF-IDF":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_tfidf(self.URM_train)

                if self.feature_weighting == "none":
                    similarity = Compute_Similarity(self.URM_train.T,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)
                else:
                    similarity = Compute_Similarity(self.URM_train_copy.T,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)


            self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, " \
                            "normalize= {4}".format(self.sparse_weights, similarity, shrink, topK, normalize)

            if self.sparse_weights:
                self.W_sparse = similarity.compute_similarity()
            else:
                self.W = similarity.compute_similarity()
                self.W = self.W.toarray()
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name=self.RECOMMENDER_NAME + "_" + location +
                           "_model")
Ejemplo n.º 4
0
 def get_URM_train_okapi(self):
     if self.URM_train is None:
         raise TypeError("PlaylistDataReader: URM train is not build")
     else:
         self.URM_train_okapi = to_okapi(self.URM_train)
         return self.URM_train_okapi
    def fit(self,
            topK=400,
            shrink=200,
            similarity='cosine',
            feature_weighting="BM25",
            normalize=True,
            save_model=False,
            best_parameters=False,
            offline=False,
            submission=False,
            location="submission",
            **similarity_args):
        #similarity_args = {'tversky_alpha': 0.8047100184165605, 'tversky_beta': 1.9775806370926445}
        #self.feature_weighting = feature_weighting
        if offline:
            m = OfflineDataLoader()
            folder_path_icf, file_name_icf = m.get_model(
                self.RECOMMENDER_NAME, training=(not submission))
            self.loadModel(folder_path=folder_path_icf,
                           file_name=file_name_icf)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path_icf, file_name_icf = m.get_parameter(
                    self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path_icf,
                               file_name=file_name_icf)
                #similarity_args = {'normalize': True, 'shrink': 0, 'similarity': 'tversky', 'topK': 20, 'tversky_alpha': 0.18872151621891953, 'tversky_beta': 1.99102432161935}
                similarity_args = {
                    'feature_weighting': 'BM25',
                    'normalize': True,
                    'shrink': 200,
                    'similarity': 'cosine',
                    'topK': 400
                }
                if self.feature_weighting == "none":
                    pass
                if self.feature_weighting == "BM25":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_okapi(self.URM_train)

                elif self.feature_weighting == "TF-IDF":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_tfidf(self.URM_train)
                similarity = Compute_Similarity(self.URM_train_copy,
                                                **similarity_args)
            else:
                self.topK = topK
                self.shrink = shrink
                self.feature_weighting = feature_weighting
                if self.feature_weighting == "BM25":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_okapi(self.URM_train)

                elif self.feature_weighting == "TF-IDF":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_tfidf(self.URM_train)
                if self.feature_weighting == "none":
                    similarity = Compute_Similarity(self.URM_train,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)
                else:
                    similarity = Compute_Similarity(self.URM_train_copy,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)
            self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, normalize={4}".format(
                self.sparse_weights, similarity, shrink, topK, normalize)
            if self.sparse_weights:
                self.W_sparse = similarity.compute_similarity()
            else:
                self.W = similarity.compute_similarity()
                self.W = self.W.toarray()
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME + "_" + location +
                           "_model")