コード例 #1
0
    def fit(self):
        self.URM = apply_feature_weighting(self.URM,
                                           self.feature_weighting,
                                           K=self.K,
                                           B=self.B)

        self._train()
コード例 #2
0
    def fit(self, verbose=True):
        self.URM = apply_feature_weighting(self.URM, self.feature_weighting)

        # URM matrix is 0-1s already
        URM_train_positive = self.URM.copy()

        self.cythonEpoch = SLIM_BPR_Cython_Epoch(
            URM_train_positive,
            train_with_sparse_weights=self.train_with_sparse_weights,
            final_model_sparse_weights=True,
            topK=self.topK,
            learning_rate=self.learning_rate,
            li_reg=self.lambda_i,
            lj_reg=self.lambda_j,
            batch_size=1,
            symmetric=self.symmetric,
            sgd_mode=self.sgd_mode,
            verbose=verbose,
            random_seed=None,
            gamma=self.gamma,
            beta_1=self.beta_1,
            beta_2=self.beta_2)

        # MAIN LOOP of training
        convergence = False
        best_MAP = 0
        epochs_current = 0
        lower_epochs = 0
        while epochs_current < self.epochs and not convergence:
            # run an epoch
            self.cythonEpoch.epochIteration_Cython()
            if self.patience != None:
                # prepare for validation
                self.get_S_set_W_set_predicted_URM()

                _, _, MAP = utils.evaluate.evaluate_algorithm(
                    self.URM_test, self)
                if MAP > best_MAP:  # best model so far
                    print(
                        "Found new best MAP! Epoch:{}, New MAP:{}, Old MAP:{}".
                        format(epochs_current, MAP, best_MAP))
                    self.S_best = self.S_incremental.copy()
                    best_MAP = MAP
                    lower_epochs = 0
                else:  # one more run without improvements
                    lower_epochs += 1
                    if lower_epochs > self.patience:
                        convergence = True

            epochs_current += 1

        if self.patience != None:
            # Restore best model so far:
            self.use_Sbest_set_W_set_predicted_URM()
        else:
            self.get_S_set_W_set_predicted_URM()

        self.cythonEpoch._dealloc()

        sys.stdout.flush()
コード例 #3
0
    def fit(self):
        self.Cui = apply_feature_weighting(self.URM,
                                           self.feature_weighting,
                                           K=self.K,
                                           B=self.B)
        """ Fits the ALS MF model """

        self._train()
コード例 #4
0
    def fit(self):
        self.URM = apply_feature_weighting(self.URM, self.feature_weighting, K=self.K, B=self.B)

        similarity_object = Compute_Similarity(self.URM,
                                               shrink=self.shrink,
                                               topK=self.topK,
                                               normalize=self.normalize,
                                               similarity=self.similarity,
                                               asymmetric_alpha=self.asymmetric_alpha)

        self.W_sparse = similarity_object.compute_similarity()

        # Precompute URM
        self.predicted_URM = self.URM.dot(self.W_sparse)
コード例 #5
0
    def fit(self, model_name='als', verbose=True):

        self.URM = apply_feature_weighting(self.URM,
                                           self.feature_weighting,
                                           K=self.K,
                                           B=self.B)
        """ train the ALS model using the implicit module """

        start_time = time.time()

        # creates ALS model
        if model_name == 'als':
            self.model = impl.als.AlternatingLeastSquares(
                factors=self.latent_factors,
                regularization=self.lambda_val,
                iterations=self.iterations)
        elif model_name == 'nmslibals':
            self.model = impl.approximate_als.NMSLibAlternatingLeastSquares(
                factors=self.latent_factors,
                regularization=self.lambda_val,
                iterations=self.iterations,
            )
        elif model_name == 'faissals':
            self.model = impl.approximate_als.FaissAlternatingLeastSquares(
                factors=self.latent_factors,
                regularization=self.lambda_val,
                iterations=self.iterations)
        elif model_name == 'annoyals':
            self.model = impl.approximate_als.AnnoyAlternatingLeastSquares(
                factors=self.latent_factors,
                regularization=self.lambda_val,
                iterations=self.iterations)
        else:
            exit('Invalid model name')

        # fit the ALS model
        # since the model is expecting a item-user matrix we need to pass the transpose of URM
        A = self.URM.T.copy()
        self.model.fit(A)

        # gets the results of the training
        self.user_factors = self.model.user_factors
        self.item_factors = self.model.item_factors

        if verbose:
            print("IMPLICIT ALS training computed in {:.2f} seconds".format(
                time.time() - start_time))
コード例 #6
0
    def fit(self):

        self.URM = apply_feature_weighting(self.URM, self.feature_weighting, K=self.K, B=self.B)

        #
        # if X.dtype != np.float32:
        #     print("P3ALPHA fit: For memory usage reasons, we suggest to use np.float32 as dtype for the dataset")

        if self.min_rating > 0:
            self.URM.data[self.URM.data < self.min_rating] = 0
            self.URM.eliminate_zeros()
            if self.implicit:
                self.URM.data = np.ones(self.URM.data.size, dtype=np.float32)

        #Pui is the row-normalized urm
        Pui = normalize(self.URM, norm='l1', axis=1)

        #Piu is the column-normalized, "boolean" urm transposed
        X_bool = self.URM.transpose(copy=True)
        X_bool.data = np.ones(X_bool.data.size, np.float32)
        #ATTENTION: axis is still 1 because i transposed before the normalization
        Piu = normalize(X_bool, norm='l1', axis=1)
        del(X_bool)

        # Alfa power
        if self.alpha != 1.:
            Pui = Pui.power(self.alpha)
            Piu = Piu.power(self.alpha)

        # Final matrix is computed as Pui * Piu * Pui
        # Multiplication unpacked for memory usage reasons
        block_dim = 200
        d_t = Piu

        # Use array as it reduces memory requirements compared to lists
        dataBlock = 10000000

        rows = np.zeros(dataBlock, dtype=np.int32)
        cols = np.zeros(dataBlock, dtype=np.int32)
        values = np.zeros(dataBlock, dtype=np.float32)

        numCells = 0


        start_time = time.time()
        start_time_printBatch = start_time

        for current_block_start_row in range(0, Pui.shape[1], block_dim):

            if current_block_start_row + block_dim > Pui.shape[1]:
                block_dim = Pui.shape[1] - current_block_start_row

            similarity_block = d_t[current_block_start_row:current_block_start_row + block_dim, :] * Pui
            similarity_block = similarity_block.toarray()

            for row_in_block in range(block_dim):
                row_data = similarity_block[row_in_block, :]
                row_data[current_block_start_row + row_in_block] = 0

                best = row_data.argsort()[::-1][:self.topK]

                notZerosMask = row_data[best] != 0.0

                values_to_add = row_data[best][notZerosMask]
                cols_to_add = best[notZerosMask]

                for index in range(len(values_to_add)):

                    if numCells == len(rows):
                        rows = np.concatenate((rows, np.zeros(dataBlock, dtype=np.int32)))
                        cols = np.concatenate((cols, np.zeros(dataBlock, dtype=np.int32)))
                        values = np.concatenate((values, np.zeros(dataBlock, dtype=np.float32)))


                    rows[numCells] = current_block_start_row + row_in_block
                    cols[numCells] = cols_to_add[index]
                    values[numCells] = values_to_add[index]

                    numCells += 1


            if time.time() - start_time_printBatch > 60:
                print("Processed {} ( {:.2f}% ) in {:.2f} minutes. Rows per second: {:.0f}".format(
                    current_block_start_row,
                    100.0 * float(current_block_start_row) / Pui.shape[1],
                    (time.time() - start_time) / 60,
                    float(current_block_start_row) / (time.time() - start_time)))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_printBatch = time.time()

        self.W_sparse = sp.csr_matrix((values[:numCells], (rows[:numCells], cols[:numCells])), shape=(Pui.shape[1], Pui.shape[1]))


        if self.normalize_similarity:
            self.W_sparse = normalize(self.W_sparse, norm='l1', axis=1)


        if self.topK != False:
            self.W_sparse = similarityMatrixTopK(self.W_sparse, k=self.topK)

        self.W_sparse = check_matrix(self.W_sparse, format='csr')

        # Precompute URM
        self.predicted_URM = self.URM.dot(self.W_sparse)