Esempio n. 1
0
    def fit(self,
            URM,
            verbose=True,
            l1_ratio=1.0,
            alpha=1.0,
            positive_only=True,
            topK=494,
            tuning=False,
            similarity_path=SIMILARITY_PATH):

        self.URM = URM
        self.l1_ratio = l1_ratio
        self.positive_only = positive_only
        self.topK = topK
        #1e-4
        self.helper = BaseFunction()

        if tuning:
            if not os.path.exists(os.getcwd() + similarity_path):
                self.run_fit()
                self.helper.export_similarity_matrix(os.getcwd() +
                                                     similarity_path,
                                                     self.W_sparse,
                                                     name=RECOMMENDER_NAME)
            self.W_sparse = self.helper.import_similarity_matrix(
                os.getcwd() + similarity_path)
            self.similarityProduct = self.URM.dot(self.W_sparse)

        else:
            self.run_fit()
            self.similarityProduct = self.URM.dot(self.W_sparse)
Esempio n. 2
0
 def __init__(self, recommender, name):
     self.recommender = recommender
     self.name = name
     self.helper = BaseFunction()
     self.helper.get_URM()
     self.helper.get_ICM()
     self.helper.get_UCM()
     self.helper.split_80_20()
     self.helper.get_target_users()
Esempio n. 3
0
class AlternatingLeastSquare:

    def __init__(self, n_factors=400, regularization=0.1104, iterations=50):
        self.n_factors = n_factors
        self.regularization = regularization
        self.iterations = iterations
        self.helper = BaseFunction()

    def run_fit(self):
        # Initialize the als model and fit it using the sparse item-user matrix
        model = implicit.als.AlternatingLeastSquares(factors=self.n_factors, regularization=self.regularization,
                                                     iterations=self.iterations)

        alpha_val = 24
        # Calculate the confidence by multiplying it by our alpha value.
        data_conf = (self.sparse_item_user * alpha_val).astype('double')

        # Fit the model
        model.fit(data_conf)

        # Get the user and item vectors from our trained model
        self.user_factors = model.user_factors
        self.item_factors = model.item_factors

    def fit(self, URM, tuning=False, user_path=USER_PATH, item_path=ITEM_PATH):
        self.URM = URM
        self.sparse_item_user = self.URM.T

        if tuning:
            if not os.path.exists(os.getcwd() + user_path) and not os.path.exists(os.getcwd() + user_path):
                self.run_fit()
                self.helper.export_nparr(user_path, self.user_factors)
                self.helper.export_nparr(item_path, self.item_factors)
            self.user_factors = self.helper.import_nparr(user_path)
            self.item_factors = self.helper.import_nparr(item_path)
        else:
            self.run_fit()

    def get_expected_ratings(self, user_id):
        scores = np.dot(self.user_factors[user_id], self.item_factors.T)
        return np.squeeze(scores)

    def recommend(self, user_id, at=10):

        expected_ratings = self.get_expected_ratings(user_id)

        recommended_items = np.flip(np.argsort(expected_ratings), 0)

        unseen_items_mask = np.in1d(recommended_items, self.URM[user_id].indices,
                                    assume_unique=True, invert=True)
        recommended_items = recommended_items[unseen_items_mask]
        return recommended_items[0:at]
Esempio n. 4
0
    def __init__(self,
                 positive_threshold=1,
                 recompile_cython=False,
                 final_model_sparse_weights=True,
                 train_with_sparse_weights=False,
                 symmetric=True,
                 epochs=200,
                 batch_size=1,
                 lambda_i=0.01,
                 lambda_j=0.001,
                 learning_rate=0.01,
                 topK=10,
                 sgd_mode='adagrad',
                 gamma=0.995,
                 beta_1=0.9,
                 beta_2=0.999):

        #### Retreiving parameters for fitting #######
        self.epochs = epochs
        self.batch_size = batch_size
        self.lambda_i = lambda_i
        self.lambda_j = lambda_j
        self.learning_rate = learning_rate
        self.topK = topK
        self.sgd_mode = sgd_mode
        self.gamma = gamma
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.symmetric = symmetric
        #############################################

        self.normalize = False
        self.positive_threshold = positive_threshold

        self.train_with_sparse_weights = train_with_sparse_weights
        self.sparse_weights = final_model_sparse_weights

        self.helper = BaseFunction()

        if self.train_with_sparse_weights:
            self.sparse_weights = True

        if recompile_cython:
            print("Compiling in Cython")
            self.runCompilationScript()
            print("Compilation Complete")
Esempio n. 5
0
class SLIM_BPR_Cython(object):

    #######################################################################################
    #                                   INIT SLIM_BPR                                     #
    #######################################################################################

    def __init__(self,
                 positive_threshold=1,
                 recompile_cython=False,
                 final_model_sparse_weights=True,
                 train_with_sparse_weights=False,
                 symmetric=True,
                 epochs=200,
                 batch_size=1,
                 lambda_i=0.01,
                 lambda_j=0.001,
                 learning_rate=0.01,
                 topK=10,
                 sgd_mode='adagrad',
                 gamma=0.995,
                 beta_1=0.9,
                 beta_2=0.999):

        #### Retreiving parameters for fitting #######
        self.epochs = epochs
        self.batch_size = batch_size
        self.lambda_i = lambda_i
        self.lambda_j = lambda_j
        self.learning_rate = learning_rate
        self.topK = topK
        self.sgd_mode = sgd_mode
        self.gamma = gamma
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.symmetric = symmetric
        #############################################

        self.normalize = False
        self.positive_threshold = positive_threshold

        self.train_with_sparse_weights = train_with_sparse_weights
        self.sparse_weights = final_model_sparse_weights

        self.helper = BaseFunction()

        if self.train_with_sparse_weights:
            self.sparse_weights = True

        if recompile_cython:
            print("Compiling in Cython")
            self.runCompilationScript()
            print("Compilation Complete")

    #######################################################################################
    #                                     RUN FITTNG                                      #
    #######################################################################################

    def fit(self, URM_train, tuning=False, similarity_path=SIMILARITY_PATH):
        self.__init__()
        self.URM = URM_train
        self.tuning = tuning
        self.n_users = URM_train.shape[0]
        self.n_items = URM_train.shape[1]

        # Select only positive interactions
        URM_train_positive = self.URM.copy()
        self.URM_mask = self.URM.copy()

        self.URM_mask.data = self.URM_mask.data >= self.positive_threshold
        self.URM_mask.eliminate_zeros()

        assert self.URM_mask.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high"

        # Start fitting
        URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
        URM_train_positive.eliminate_zeros()

        from Recommenders.Slim.SlimBPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch
        self.cythonEpoch = SLIM_BPR_Cython_Epoch(
            self.URM_mask,
            train_with_sparse_weights=self.train_with_sparse_weights,
            final_model_sparse_weights=self.sparse_weights,
            topK=self.topK,
            learning_rate=self.learning_rate,
            li_reg=self.lambda_i,
            lj_reg=self.lambda_j,
            batch_size=self.batch_size,
            symmetric=self.symmetric,
            sgd_mode=self.sgd_mode,
            gamma=self.gamma,
            beta_1=self.beta_1,
            beta_2=self.beta_2)

        self._initialize_incremental_model()
        self.epochs_best = 0
        currentEpoch = 0

        while currentEpoch < self.epochs:
            self._run_epoch()
            self._update_best_model()
            currentEpoch += 1

        self.get_S_incremental_and_set_W(similarity_path)
        self.cythonEpoch._dealloc()
        sys.stdout.flush()

        self.score = self.URM.dot(self.W_sparse)

    def _initialize_incremental_model(self):
        self.S_incremental = self.cythonEpoch.get_S()
        self.S_best = self.S_incremental.copy()

    def _update_incremental_model(self):
        self.get_S_incremental_and_set_W()

    def _update_best_model(self):
        self.S_best = self.S_incremental.copy()

    def _run_epoch(self):
        self.cythonEpoch.epochIteration_Cython()

    def get_S_incremental_and_set_W(self, similarity_path):

        self.S_incremental = self.cythonEpoch.get_S()

        if self.train_with_sparse_weights:

            if self.tuning:
                if not os.path.exists(os.getcwd() + similarity_path):
                    self.W_sparse = self.S_incremental
                    self.helper.export_similarity_matrix(os.getcwd() +
                                                         similarity_path,
                                                         self.W_sparse,
                                                         name=RECOMMENDER_NAME)
                self.W_sparse = self.helper.import_similarity_matrix(
                    os.getcwd() + similarity_path)
            else:
                self.W_sparse = self.S_incremental

        else:
            if self.tuning:
                if not os.path.exists(os.getcwd() + similarity_path):
                    self.W_sparse = similarityMatrixTopK(self.S_incremental,
                                                         k=self.topK)
                    self.helper.export_similarity_matrix(os.getcwd() +
                                                         similarity_path,
                                                         self.W_sparse,
                                                         name=RECOMMENDER_NAME)
                self.W_sparse = self.helper.import_similarity_matrix(
                    os.getcwd() + similarity_path)
            else:
                self.W_sparse = similarityMatrixTopK(self.S_incremental,
                                                     k=self.topK)

    def runCompilationScript(self):

        # Run compile script setting the working directory to ensure the compiled file are contained in the
        # appropriate subfolder and not the project root

        file_subfolder = "/Slim/Cython"
        file_to_compile_list = ['SLIM_BPR_Cython_Epoch.pyx']

        run_compile_subprocess(file_subfolder, file_to_compile_list)

        print("{}: Compiled module {} in subfolder: {}".format(
            RECOMMENDER_NAME, file_to_compile_list, file_subfolder))

        # Command to run compilation script
        # python compile_script.py SLIM_BPR_Cython_Epoch.pyx build_ext --inplace

        # Command to generate html report
        # cython -a SLIM_BPR_Cython_Epoch.pyx

    def get_expected_ratings(self, user_id):
        expected_ratings = self.score[user_id].todense()
        return np.squeeze(np.asarray(expected_ratings))

    def recommend(self, user_id, at=10):
        # compute the scores using the dot product
        scores = self.get_expected_ratings(user_id)
        ranking = scores.argsort()[::-1]
        unseen_items_mask = np.in1d(ranking,
                                    self.URM[user_id].indices,
                                    assume_unique=True,
                                    invert=True)
        ranking = ranking[unseen_items_mask]
        return ranking[:at]
Esempio n. 6
0
    def __init__(self):
        self.helper = BaseFunction()
        self.URM = None
        self.URM_train = None
        self.URM_test = None
        self.URM = self.helper.get_URM()
        self.helper.split_80_20()
        self.URM_train, self.URM_test = self.helper.URM_train, self.helper.URM_test
        self.helper.get_ICM()
        self.helper.get_UCM()
        self.helper.get_target_users()
        self.ICM_all = self.helper.ICM_all
        self.UCM_all = self.helper.UCM_all
        self.initial_target_user = self.helper.userlist_unique

        MAP_ItemCF_per_group = []
        MAP_UserCF_per_group = []
        MAP_ItemCBF_per_group = []
        MAP_UserCBF_per_group = []
        MAP_ItemCBF_BM25_per_group = []
        MAP_UserCBF_BM25_per_group = []
        MAP_ItemCBF_TFIDF_per_group = []
        MAP_UserCBF_TFIDF_per_group = []
        MAP_Slim_per_group = []
        MAP_Elastic_per_group = []
        MAP_PureSVD_per_group = []
        MAP_P3Alpha_per_group = []
        MAP_RP3Beta_per_group = []
        MAP_ALS_per_group = []
        MAP_Hybrid2_per_group = []
        MAP_Hybrid6_per_group = []
        MAP_H6_bis_per_group = []
        MAP_Hybrid7_per_group = []
        MAP_Hybrid8_per_group = []
        MAP_HybridCB_per_group = []

        self.profile_length = np.ediff1d(self.URM_train.indptr)
        self.blocksize = int(len(self.profile_length) * 0.05)
        self.sortedusers = np.argsort(self.profile_length)

        self.ItemCF = ItemKNNCFRecommender()
        self.UserCF = UserKNNCFRecommender()
        self.ItemCBF = ItemCBFKNNRecommender()
        self.UserCBF = UserCBFKNNRecommender()
        self.Slim = SLIM_BPR_Cython()
        self.Elastic = SLIMElasticNetRecommender()
        self.PureSVD = PureSVDRecommender()
        self.P3Alpha = P3AlphaRecommender()
        self.RP3Beta = RP3BetaRecommender()
        self.ALS = AlternatingLeastSquare()
        self.H6_bis = Hybrid_Combo6_bis("Combo6_bis", UserCBFKNNRecommender())

        self.ItemCBF.fit(
            self.URM_train,
            self.ICM_all,
            tuning=True,
            similarity_path="/SimilarityProduct/ItemCBF_similarity.npz")
        self.UserCBF.fit(
            self.URM_train,
            self.UCM_all,
            tuning=True,
            similarity_path="/SimilarityProduct/UserCBF_similarity.npz")
        self.ItemCF.fit(
            self.URM_train,
            tuning=True,
            similarity_path="/SimilarityProduct/ItemCF_similarity.npz")
        self.UserCF.fit(
            self.URM_train,
            tuning=True,
            similarity_path="/SimilarityProduct/UserCF_similarity.npz")
        self.Slim.fit(self.URM_train,
                      tuning=True,
                      similarity_path="/SimilarityProduct/Slim_similarity.npz")
        self.Elastic.fit(
            self.URM_train,
            tuning=True,
            similarity_path="/SimilarityProduct/Elastic_similarity.npz")
        self.PureSVD.fit(self.URM_train)
        self.P3Alpha.fit(
            self.URM_train,
            tuning=True,
            similarity_path="/SimilarityProduct/P3Aplha_similarity.npz")
        self.RP3Beta.fit(
            self.URM_train,
            tuning=True,
            similarity_path="/SimilarityProduct/RP3Beta_similarity.npz")
        self.ALS.fit(self.URM_train)
        self.H6_bis.fit(self.URM_train,
                        self.ICM_all,
                        self.UCM_all,
                        tuning=True)

        for group_id in range(0, 20):
            start_pos = group_id * self.blocksize
            end_pos = min((group_id + 1) * self.blocksize,
                          len(self.profile_length))

            users_in_group = self.sortedusers[start_pos:end_pos]

            users_in_group_p_len = self.profile_length[users_in_group]

            print("Group {}, average p.len {:.2f}, min {}, max {}".format(
                group_id, users_in_group_p_len.mean(),
                users_in_group_p_len.min(), users_in_group_p_len.max()))
            users_not_in_group_flag = np.isin(self.sortedusers,
                                              users_in_group,
                                              invert=True)
            users_not_in_group = self.sortedusers[users_not_in_group_flag]

            users_in_group = list(
                set(self.initial_target_user) - set(list(users_not_in_group)))

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.ItemCBF,
                                                 at=10)
            MAP_ItemCBF_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.ItemCF,
                                                 at=10)
            MAP_ItemCF_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.UserCF,
                                                 at=10)
            MAP_UserCF_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.Slim,
                                                 at=10)
            MAP_Slim_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.Elastic,
                                                 at=10)
            MAP_Elastic_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.PureSVD,
                                                 at=10)
            MAP_PureSVD_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.P3Alpha,
                                                 at=10)
            MAP_P3Alpha_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.RP3Beta,
                                                 at=10)
            MAP_RP3Beta_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.UserCBF,
                                                 at=10)
            MAP_UserCBF_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.ALS,
                                                 at=10)
            MAP_ALS_per_group.append(results)

            results = evaluate_algorithm_classes(self.URM_test,
                                                 users_in_group,
                                                 self.H6_bis,
                                                 at=10)
            MAP_H6_bis_per_group.append(results)

        pyplot.plot(MAP_UserCBF_per_group, label="UserCBF")
        pyplot.plot(MAP_ItemCBF_per_group, label="ItemCBF")
        pyplot.plot(MAP_ItemCF_per_group, label="ItemCF")
        pyplot.plot(MAP_UserCF_per_group, label="UserCF")
        pyplot.plot(MAP_Slim_per_group, label="Slim")
        pyplot.plot(MAP_Elastic_per_group, label="Elastic")
        pyplot.plot(MAP_P3Alpha_per_group, label="P3Alpha")
        pyplot.plot(MAP_RP3Beta_per_group, label="RP3Beta")
        pyplot.plot(MAP_PureSVD_per_group, label="PureSVD")
        pyplot.plot(MAP_ALS_per_group, label="ALS")
        pyplot.plot(MAP_H6_bis_per_group, label="H6_bis")

        pyplot.xlabel('User Group')
        pyplot.ylabel('MAP')
        pyplot.xticks(np.arange(0, 20, 1))
        pyplot.grid(b=True,
                    axis='both',
                    color='firebrick',
                    linestyle='--',
                    linewidth=0.5)
        pyplot.legend(loc='lower right')
        pyplot.show()
Esempio n. 7
0
 def __init__(self, n_factors=400, regularization=0.1104, iterations=50):
     self.n_factors = n_factors
     self.regularization = regularization
     self.iterations = iterations
     self.helper = BaseFunction()
Esempio n. 8
0
import pandas as pd
from collections import Counter
from Base.BaseFunction import BaseFunction

filename = os.path.join(os.getcwd(), "Results/Hybrid-2020-01-06_20.12.34.csv")


def load_sample():
    cols = ['user_id', 'item_list']
    sample_data = pd.read_csv(filename, names=cols, header=0)
    return sample_data


if __name__ == "__main__":

    h = BaseFunction()
    h.get_URM()
    s = load_sample()
    x = s.item_list.values
    it = []

    for i in x:
        it.append(re.findall(r'\d+', i))
    flattened = []

    for sublist in it:
        for val in sublist:
            flattened.append(int(val))

    item_pop = np.ediff1d(h.URM_all.tocsc().indptr)
    coldi = list(np.where(item_pop == 0)[0])
Esempio n. 9
0
class RP3BetaRecommender(object):

    #######################################################################################
    #                                       INIT CLASS                                    #
    #######################################################################################

    def __init__(self):
        self.verbose = True
        self.helper = BaseFunction()

    def __str__(self):
        return "RP3beta(alpha={}, beta={}, min_rating={}, topk={}, implicit={}, normalize_similarity={})".format(
            self.alpha, self.beta, self.min_rating, self.topK, self.implicit,
            self.normalize_similarity)

    def _print(self, string):
        if self.verbose:
            print("{}: {}".format(RECOMMENDER_NAME, string))

    def _check_format(self):

        if not self._URM_train_format_checked:

            if self.URM_train.getformat() != "csr":
                self._print(
                    "PERFORMANCE ALERT compute_item_score: {} is not {}, this will significantly slow down the computation."
                    .format("URM_train", "csr"))

            self._URM_train_format_checked = True

        if not self._W_sparse_format_checked:

            if self.W_sparse.getformat() != "csr":
                self._print(
                    "PERFORMANCE ALERT compute_item_score: {} is not {}, this will significantly slow down the computation."
                    .format("W_sparse", "csr"))

            self._W_sparse_format_checked = True

    #######################################################################################
    #                                     FIT RECOMMENDER                                 #
    #######################################################################################

    def run_fit(self):
        # if X.dtype != np.float32:
        #     print("RP3beta fit: For memory usage reasons, we suggest to use np.float32 as dtype for the dataset")

        if self.min_rating > 0:
            self.URM_train.data[self.URM_train.data < self.min_rating] = 0
            self.URM_train.eliminate_zeros()
            if self.implicit:
                self.URM_train.data = np.ones(self.URM_train.data.size,
                                              dtype=np.float32)

        # Pui is the row-normalized urm
        Pui = normalize(self.URM_train, norm='l1', axis=1)

        # Piu is the column-normalized, "boolean" urm transposed
        X_bool = self.URM_train.transpose(copy=True)
        X_bool.data = np.ones(X_bool.data.size, np.float32)

        # Taking the degree of each item to penalize top popular
        # Some rows might be zero, make sure their degree remains zero
        X_bool_sum = np.array(X_bool.sum(axis=1)).ravel()

        degree = np.zeros(self.URM_train.shape[1])

        nonZeroMask = X_bool_sum != 0.0

        degree[nonZeroMask] = np.power(X_bool_sum[nonZeroMask], -self.beta)

        # ATTENTION: axis is still 1 because i transposed before the normalization
        Piu = normalize(X_bool, norm='l1', axis=1)
        del (X_bool)

        # Alfa power
        if self.alpha != 1.:
            Pui = Pui.power(self.alpha)
            Piu = Piu.power(self.alpha)

        # Final matrix is computed as Pui * Piu * Pui
        # Multiplication unpacked for memory usage reasons
        block_dim = 200
        d_t = Piu

        # Use array as it reduces memory requirements compared to lists
        dataBlock = 10000000

        rows = np.zeros(dataBlock, dtype=np.int32)
        cols = np.zeros(dataBlock, dtype=np.int32)
        values = np.zeros(dataBlock, dtype=np.float32)

        numCells = 0

        start_time = time.time()
        start_time_printBatch = start_time

        for current_block_start_row in range(0, Pui.shape[1], block_dim):

            if current_block_start_row + block_dim > Pui.shape[1]:
                block_dim = Pui.shape[1] - current_block_start_row

            similarity_block = d_t[
                current_block_start_row:current_block_start_row +
                block_dim, :] * Pui
            similarity_block = similarity_block.toarray()

            for row_in_block in range(block_dim):
                row_data = np.multiply(similarity_block[row_in_block, :],
                                       degree)
                row_data[current_block_start_row + row_in_block] = 0

                best = row_data.argsort()[::-1][:self.topK]

                notZerosMask = row_data[best] != 0.0

                values_to_add = row_data[best][notZerosMask]
                cols_to_add = best[notZerosMask]

                for index in range(len(values_to_add)):

                    if numCells == len(rows):
                        rows = np.concatenate(
                            (rows, np.zeros(dataBlock, dtype=np.int32)))
                        cols = np.concatenate(
                            (cols, np.zeros(dataBlock, dtype=np.int32)))
                        values = np.concatenate(
                            (values, np.zeros(dataBlock, dtype=np.float32)))

                    rows[numCells] = current_block_start_row + row_in_block
                    cols[numCells] = cols_to_add[index]
                    values[numCells] = values_to_add[index]

                    numCells += 1

            if time.time() - start_time_printBatch > 60:
                self._print(
                    "Processed {} ( {:.2f}% ) in {:.2f} minutes. Rows per second: {:.0f}"
                    .format(
                        current_block_start_row,
                        100.0 * float(current_block_start_row) / Pui.shape[1],
                        (time.time() - start_time) / 60,
                        float(current_block_start_row) /
                        (time.time() - start_time)))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_printBatch = time.time()

        self.W_sparse = sps.csr_matrix(
            (values[:numCells], (rows[:numCells], cols[:numCells])),
            shape=(Pui.shape[1], Pui.shape[1]))

        if self.normalize_similarity:
            self.W_sparse = normalize(self.W_sparse, norm='l1', axis=1)

        if self.topK != False:
            self.W_sparse = similarityMatrixTopK(self.W_sparse, k=self.topK)

        self.W_sparse = check_matrix(self.W_sparse, format='csr')

    def fit(self,
            URM_train,
            alpha=0.41417,
            beta=0.04995,
            min_rating=0,
            topK=54,
            implicit=False,
            normalize_similarity=True,
            tuning=False,
            similarity_path=SIMILARITY_PATH):

        self.URM_train = check_matrix(URM_train.copy(),
                                      'csr',
                                      dtype=np.float32)
        self.URM_train.eliminate_zeros()
        self.n_users, self.n_items = self.URM_train.shape

        self.alpha = alpha
        self.beta = beta
        self.min_rating = min_rating
        self.topK = topK
        self.implicit = implicit
        self.normalize_similarity = normalize_similarity

        if tuning:
            if not os.path.exists(os.getcwd() + similarity_path):
                self.run_fit()
                self.helper.export_similarity_matrix(os.getcwd() +
                                                     similarity_path,
                                                     self.W_sparse,
                                                     name=RECOMMENDER_NAME)
            self.W_sparse = self.helper.import_similarity_matrix(
                os.getcwd() + similarity_path)

        else:
            self.run_fit()
        self.similarityProduct = self.URM_train.dot(self.W_sparse)

    #######################################################################################
    #                                    RUN RECOMMENATION                                #
    #######################################################################################

    def get_expected_ratings(self, user_id):
        expected_ratings = self.similarityProduct[user_id].toarray().ravel()
        return np.squeeze(np.asarray(expected_ratings))

    def recommend(self, user_id, at=10):
        # compute the scores using the dot product
        expected_ratings = self.get_expected_ratings(user_id)
        ranking = expected_ratings.argsort()[::-1]
        unseen_items_mask = np.in1d(ranking,
                                    self.URM_train[user_id].indices,
                                    assume_unique=True,
                                    invert=True)
        ranking = ranking[unseen_items_mask]
        return ranking[:at]
Esempio n. 10
0
class BayesianSearch:

    #######################################################################################
    #                             INIT CLASS BAYESIAN SEARCH                              #
    #######################################################################################

    def __init__(self, recommender, name):
        self.recommender = recommender
        self.name = name
        self.helper = BaseFunction()
        self.helper.get_URM()
        self.helper.split_80_20()
        self.helper.get_target_users()
        self.helper.get_UCM()
        self.helper.get_ICM()
        self.optimazer = None

    def instanziate_optimazer(self, bayesian_method_call, pbounds):
        optimizer = BayesianOptimization(
            f=bayesian_method_call,
            pbounds=pbounds,
            verbose=
            2,  # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
        )

        optimizer.maximize(init_points=30, n_iter=1000, acq='ucb', kappa=0.1)

    #######################################################################################
    #                                   STEP TO MAXIMAXE                                  #
    #######################################################################################

    def step_hybrid_three(self, weight1=0, weight2=0, weight3=0):
        start_time = time.time()
        UCM_all = self.helper.UCM_all
        ICM_all = self.helper.ICM_all
        self.recommender.fit(self.helper.URM_train,
                             ICM_all=ICM_all,
                             UCM_all=UCM_all,
                             weights=[weight1, weight2, weight3],
                             tuning=True)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_hybrid_four(self, weight1=0, weight2=0, weight3=0, weight4=0):
        start_time = time.time()
        UCM_all = self.helper.UCM_all
        ICM_all = self.helper.ICM_all
        self.recommender.fit(self.helper.URM_train,
                             ICM_all=ICM_all,
                             UCM_all=UCM_all,
                             weights=[weight1, weight2, weight3, weight4],
                             tuning=True)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_hybrid_six(self,
                        weight1=0,
                        weight2=0,
                        weight3=0,
                        weight4=0,
                        weight5=0,
                        weight6=0):
        start_time = time.time()
        UCM_all = self.helper.UCM_all
        ICM_all = self.helper.ICM_all
        self.recommender.fit(
            self.helper.URM_train,
            ICM_all=ICM_all,
            UCM_all=UCM_all,
            weights=[weight1, weight2, weight3, weight4, weight5, weight6],
            tuning=True)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_hybrid_seven(self,
                          weight1=0,
                          weight2=0,
                          weight3=0,
                          weight4=0,
                          weight5=0,
                          weight6=0,
                          weight7=0):
        start_time = time.time()
        UCM_all = self.helper.UCM_all
        ICM_all = self.helper.ICM_all
        self.recommender.fit(self.helper.URM_train,
                             ICM_all=ICM_all,
                             UCM_all=UCM_all,
                             weights=[
                                 weight1, weight2, weight3, weight4, weight5,
                                 weight6, weight7
                             ],
                             tuning=True)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_fallBack_Hybrid(self, weight1=0, weight2=0):
        start_time = time.time()
        UCM_all = self.helper.UCM_all
        ICM_all = self.helper.ICM_all
        self.recommender.fit(self.helper.URM_train,
                             ICM_all=ICM_all,
                             UCM_all=UCM_all,
                             weights_fallback=[int(weight1),
                                               int(weight2)],
                             tuning=True)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_slim(self, weight1=0, weight2=0, weight3=0):
        start_time = time.time()
        self.recommender = SLIM_BPR_Cython(lambda_i=weight1,
                                           lambda_j=weight2,
                                           learning_rate=weight3)
        self.recommender.fit(self.helper.URM_train)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_elastic(self, weight1=0, weight2=0, weight3=0):
        start_time = time.time()
        self.recommender.fit(self.helper.URM_train,
                             l1_ratio=weight1,
                             alpha=weight2,
                             topK=int(weight3))
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_ALS(self, weight1=0, weight2=0, weight3=0):
        start_time = time.time()
        self.recommender = AlternatingLeastSquare(n_factors=int(weight1),
                                                  regularization=weight2,
                                                  iterations=int(weight3))
        self.recommender.fit(self.helper.URM_train)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_Item_CB(self, weight1=0, weight2=0):
        start_time = time.time()
        ICM_all = self.helper.ICM_all
        self.recommender.fit(self.helper.URM_train,
                             ICM_all,
                             knn=int(weight1),
                             shrink=int(weight2),
                             tuning=False)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_User_CB(self, weight1=0, weight2=0):
        start_time = time.time()
        UCM_all = self.helper.UCM_all
        self.recommender.fit(self.helper.URM_train,
                             UCM_all,
                             knn=int(weight1),
                             shrink=int(weight2))
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_P3Alpha(self, weight1=0, weight2=0):
        start_time = time.time()
        self.recommender.fit(self.helper.URM_train,
                             topK=int(weight1),
                             alpha=weight2)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_RP3Beta(self, alpha=0, beta=0, min_rating=0, topK=0):
        start_time = time.time()
        self.recommender.fit(self.helper.URM_train,
                             alpha=alpha,
                             beta=beta,
                             min_rating=min_rating,
                             topK=int(topK))
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_PureSVD_randomSVD(self, n_components, n_iter):
        start_time = time.time()
        self.recommender.fit(self.helper.URM_train,
                             n_components=int(n_components),
                             n_iter=int(n_iter))
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_FunkSVD(self, epoch, num_factors, learning_rate, user_reg,
                     item_reg):
        start_time = time.time()
        self.recommender = MatrixFactorization_FunkSVD_Cython(
            int(epoch), int(num_factors), learning_rate, user_reg, item_reg)
        self.recommender.fit(self.helper.URM_train)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_TEST(self, t1, t2, t3, t4, t5):
        start_time = time.time()
        UCM_all = self.helper.UCM_all
        ICM_all = self.helper.ICM_all
        self.recommender = Hybrid_User_Wise("Hybrid User Wise",
                                            UserCBFKNNRecommender())
        self.recommender.fit(self.helper.URM_train,
                             ICM_all=ICM_all,
                             UCM_all=UCM_all,
                             thre1=t1,
                             thre2=t2,
                             thre3=t3,
                             thre4=t4,
                             thre5=t5,
                             tuning=True)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    def step_all(self,
                 H0_ICF_sh=0,
                 H0_ICF_tK=0,
                 H1_UCF_sh=0,
                 H1_UCF_tK=0,
                 H2_ICB_sh=0,
                 H2_ICB_tK=0,
                 H3_UCB_sh=0,
                 H3_UCB_tK=0,
                 H4_El_tK=0,
                 H5_RP3_a=0,
                 H5_RP3_b=0,
                 H5_RP3_tK=0,
                 H6_SL_bs=0,
                 H6_SL_ep=0,
                 H6_SL_l_i=0,
                 H6_SL_l_j=0,
                 H6_SL_l_r=0,
                 H6_SL_tK=0,
                 H7_ALS_i=0,
                 H7_ALS_nf=0,
                 H7_ALS_re=0,
                 weight1=0,
                 weight2=0,
                 weight3=0,
                 weight4=0,
                 weight5=0,
                 weight6=0,
                 weight7=0):

        start_time = time.time()
        UCM_all = self.helper.UCM_all
        ICM_all = self.helper.ICM_all

        ItemCF = ItemKNNCFRecommender()
        UserCF = UserKNNCFRecommender()
        ItemCB = ItemCBFKNNRecommender()
        UserCB = UserCBFKNNRecommender()
        ElasticNet = SLIMElasticNetRecommender()
        RP3Beta = RP3BetaRecommender()
        Slim = SLIM_BPR_Cython(batch_size=int(H6_SL_bs),
                               epochs=int(H6_SL_ep),
                               lambda_i=H6_SL_l_i,
                               lambda_j=H6_SL_l_j,
                               learning_rate=H6_SL_l_r,
                               topK=int(H6_SL_tK))
        ALS = AlternatingLeastSquare(iterations=int(H7_ALS_i),
                                     n_factors=int(H7_ALS_nf),
                                     regularization=H7_ALS_re)

        ItemCF.fit(self.helper.URM_train, knn=int(H0_ICF_tK), shrink=H0_ICF_sh)
        UserCF.fit(self.helper.URM_train, knn=int(H1_UCF_tK), shrink=H1_UCF_sh)
        ItemCB.fit(self.helper.URM_train,
                   ICM_all,
                   knn=int(H2_ICB_tK),
                   shrink=H2_ICB_sh)
        UserCB.fit(self.helper.URM_train,
                   UCM_all,
                   knn=int(H3_UCB_tK),
                   shrink=H3_UCB_sh)
        ElasticNet.fit(self.helper.URM_train, topK=int(H4_El_tK))
        RP3Beta.fit(self.helper.URM_train,
                    alpha=H5_RP3_a,
                    beta=H5_RP3_b,
                    topK=int(H5_RP3_tK))
        Slim.fit(self.helper.URM_train)
        ALS.fit(self.helper.URM_train)

        self.recommender = Hybrid_Achille_Tuning("Hybrid_Achille_Tuning_All",
                                                 UserCB)
        self.recommender.fit(self.helper.URM_train,
                             ICM_all=ICM_all,
                             UCM_all=UCM_all,
                             weights=[
                                 weight1, weight2, weight3, weight4, weight5,
                                 weight6, weight7
                             ],
                             ItemCF=ItemCF,
                             UserCF=UserCF,
                             ItemCB=ItemCB,
                             ElasticNet=ElasticNet,
                             RP3=RP3Beta,
                             Slim=Slim,
                             ALS=ALS)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative
Esempio n. 11
0
class Tuner_Singles():

    #######################################################################################
    #                                      INIT CLASS                                     #
    #######################################################################################

    def __init__(self, recommender, name):
        self.recommender = recommender
        self.name = name
        self.helper = BaseFunction()
        self.helper.get_URM()
        self.helper.get_ICM()
        self.helper.get_UCM()
        self.helper.split_80_20()
        self.helper.get_target_users()

    #######################################################################################
    #                                    TEP FOR TUNING                                   #
    #######################################################################################

    def step_weight(self, w1, w2):
        start_time = time.time()
        print("----------------------------------------")
        print("HybridCombination: " + self.name)
        print([w1, w2])
        print("----------------------------------------")
        list_UCM = [self.helper.UCM_age, self.helper.UCM_region]
        list_ICM = [
            self.helper.ICM, self.helper.ICM_price, self.helper.ICM_asset
        ]
        self.recommender.fit(self.helper.URM_train, [w1, w2],
                             list_ICM=list_ICM,
                             list_UCM=list_UCM,
                             tuning=False)
        cumulative = evaluation.evaluate_algorithm(self.helper.URM_test,
                                                   self.recommender,
                                                   at=10)
        elapsed_time = time.time() - start_time
        print("----------------" + str(elapsed_time) + "----------------")
        return cumulative

    #######################################################################################
    #                                  GENETIC ALGORITHM                                  #
    #######################################################################################

    def random_pop(self):
        weights = []

        for i in range(self.pop_size):
            w1 = random.randint(250, 600)  # epoch
            w2 = random.randint(100, 300)  # knn
            line = [w1, w2]
            weights.append(np.array(line))

        return weights

    def evaluate_pop(self):
        appo = []
        for chromosome in self.pop:
            res = self.evaluate_chromosome(chromosome)
            appo.append(res)
        return appo

    def evaluate_chromosome(self, chromosome):
        return self.step_weight(w1=chromosome[0], w2=chromosome[1])

    def my_index(self, l, item):
        for i in range(len(l)):
            if (item == l[i]).all():
                return i
        return -1

    def select_parents(self):
        sorted_pop_score = sorted(self.pop_scores, reverse=False)
        probs = []
        taken_pop = [False] * self.pop_size
        taken_score = [False] * self.pop_size

        l = (self.pop_size * (self.pop_size + 1)) / 2

        for i in self.pop:
            pos_of_i_in_pop = self.my_index(self.pop, i)
            while taken_pop[pos_of_i_in_pop]:
                pos_of_i_in_pop += self.my_index(
                    self.pop[pos_of_i_in_pop + 1:], i) + 1

            score_of_pos = self.pop_scores[pos_of_i_in_pop]
            ranking = self.my_index(sorted_pop_score, score_of_pos)

            while taken_score[ranking]:
                ranking += self.my_index(sorted_pop_score[ranking + 1:],
                                         score_of_pos) + 1

            taken_score[ranking] = True
            taken_pop[pos_of_i_in_pop] = True
            prob = (ranking + 1) / l
            probs.append(prob)

        parents = [
            self.pop[i] for i in np.random.choice(len(self.pop), 2, p=probs)
        ]

        return parents

    def generate_offspring(self, p1, p2):
        size = len(p1)
        offspring = np.empty((size), dtype='object')

        offspring[0] = p1[0]
        offspring[1] = p2[1]

        return offspring

    def crossover(self, parents):
        offspring1 = self.generate_offspring(parents[0], parents[1])
        offspring2 = self.generate_offspring(parents[1], parents[0])
        offspring1 = self.mutation(offspring1)
        offspring2 = self.mutation(offspring2)

        return offspring1, offspring2

    def mutation(self, offspring):
        if np.random.choice([True, False],
                            1,
                            p=[self.p_mutation, 1 - self.p_mutation]) == True:
            offspring += random.randint(0, 100)
        return offspring

    def elitism(self):
        els = self.pop[:]
        score_c = self.pop_scores[:]

        for _ in range(4):
            index = np.argmax(score_c)
            score_c.pop(index)
            self.new_pop.append(els.pop(index))

    #######################################################################################
    #                               RUN GENETIC ALGORITHM                                 #
    #######################################################################################

    def run(self, max=1000, pop_size=10, p_mutation=0.1):
        self.pop_size = pop_size
        self.p_mutation = p_mutation

        self.pop = self.random_pop()
        self.pop_scores = self.evaluate_pop()
        for i in range(max):
            self.new_pop = []
            self.elitism()
            while len(self.new_pop) < len(self.pop):
                parents = self.select_parents()
                off1, off2 = self.crossover(parents)
                self.new_pop.append(off1)
                self.new_pop.append(off2)
            self.pop = self.new_pop
            self.pop_scores = self.evaluate_pop()
            print("-----------------ENDED------------------")
            print(self.pop)
            print(np.argmax(self.pop_scores))
            print("----------------------------------------")
Esempio n. 12
0
    def __init__(self):
        self.verbose = True
        self.helper = BaseFunction()

        self._URM_train_format_checked = False
        self._W_sparse_format_checked = False
Esempio n. 13
0
class SLIMElasticNetRecommender(BaseRecommender):
    def run_fit(self):
        # Display ConvergenceWarning only once and not for every item it occurs
        warnings.simplefilter("once", category=ConvergenceWarning)

        # initialize the ElasticNet model
        self.model = ElasticNet(alpha=1e-4,
                                l1_ratio=self.l1_ratio,
                                positive=self.positive_only,
                                fit_intercept=False,
                                copy_X=False,
                                precompute=True,
                                selection='random',
                                max_iter=100,
                                tol=1e-4)

        URM_train = check_matrix(self.URM, 'csc', dtype=np.float32)

        n_items = URM_train.shape[1]

        # Use array as it reduces memory requirements compared to lists
        dataBlock = 10000000

        rows = np.zeros(dataBlock, dtype=np.int32)
        cols = np.zeros(dataBlock, dtype=np.int32)
        values = np.zeros(dataBlock, dtype=np.float32)

        numCells = 0

        start_time = time.time()
        start_time_printBatch = start_time

        # fit each item's factors sequentially (not in parallel)
        for currentItem in range(n_items):

            # get the target column
            y = URM_train[:, currentItem].toarray()

            if y.sum() == 0.0:
                continue

            # set the j-th column of X to zero
            start_pos = URM_train.indptr[currentItem]
            end_pos = URM_train.indptr[currentItem + 1]

            current_item_data_backup = URM_train.data[start_pos:end_pos].copy()
            URM_train.data[start_pos:end_pos] = 0.0

            # fit one ElasticNet model per column
            self.model.fit(URM_train, y)

            nonzero_model_coef_index = self.model.sparse_coef_.indices
            nonzero_model_coef_value = self.model.sparse_coef_.data

            local_topK = min(len(nonzero_model_coef_value) - 1, self.topK)

            relevant_items_partition = (
                -nonzero_model_coef_value
            ).argpartition(local_topK)[0:local_topK]
            relevant_items_partition_sorting = np.argsort(
                -nonzero_model_coef_value[relevant_items_partition])
            ranking = relevant_items_partition[
                relevant_items_partition_sorting]

            for index in range(len(ranking)):

                if numCells == len(rows):
                    rows = np.concatenate(
                        (rows, np.zeros(dataBlock, dtype=np.int32)))
                    cols = np.concatenate(
                        (cols, np.zeros(dataBlock, dtype=np.int32)))
                    values = np.concatenate(
                        (values, np.zeros(dataBlock, dtype=np.float32)))

                rows[numCells] = nonzero_model_coef_index[ranking[index]]
                cols[numCells] = currentItem
                values[numCells] = nonzero_model_coef_value[ranking[index]]

                numCells += 1

            # finally, replace the original values of the j-th column
            URM_train.data[start_pos:end_pos] = current_item_data_backup

            elapsed_time = time.time() - start_time
            new_time_value, new_time_unit = seconds_to_biggest_unit(
                elapsed_time)

            if time.time(
            ) - start_time_printBatch > 300 or currentItem == n_items - 1:
                print(
                    "Processed {} ( {:.2f}% ) in {:.2f} {}. Items per second: {:.2f}"
                    .format(currentItem + 1,
                            100.0 * float(currentItem + 1) / n_items,
                            new_time_value, new_time_unit,
                            float(currentItem) / elapsed_time))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_printBatch = time.time()

        # generate the sparse weight matrix

        self.W_sparse = sps.csr_matrix(
            (values[:numCells], (rows[:numCells], cols[:numCells])),
            shape=(n_items, n_items),
            dtype=np.float32)

    def fit(self,
            URM,
            verbose=True,
            l1_ratio=1.0,
            alpha=1.0,
            positive_only=True,
            topK=494,
            tuning=False,
            similarity_path=SIMILARITY_PATH):

        self.URM = URM
        self.l1_ratio = l1_ratio
        self.positive_only = positive_only
        self.topK = topK
        #1e-4
        self.helper = BaseFunction()

        if tuning:
            if not os.path.exists(os.getcwd() + similarity_path):
                self.run_fit()
                self.helper.export_similarity_matrix(os.getcwd() +
                                                     similarity_path,
                                                     self.W_sparse,
                                                     name=RECOMMENDER_NAME)
            self.W_sparse = self.helper.import_similarity_matrix(
                os.getcwd() + similarity_path)
            self.similarityProduct = self.URM.dot(self.W_sparse)

        else:
            self.run_fit()
            self.similarityProduct = self.URM.dot(self.W_sparse)
Esempio n. 14
0
class Runner:

    #######################################################################################
    #                                 INSTANCE OF RUNNER                                  #
    #######################################################################################

    def __init__(self, recommender, name, evaluate=True):
        print("Evaluation: " + str(evaluate))
        self.recommender = recommender
        self.evaluate = evaluate
        self.name = name
        self.functionality = BaseFunction()

    #######################################################################################
    #                                     WRITE RESULT                                    #
    #######################################################################################

    def write_csv(self, rows, name):
        fields = FIELDS
        timestr = time.strftime("%Y-%m-%d_%H.%M.%S")
        file_path = "Results/" + name + "-" + timestr + ".csv"

        with open(file_path, 'w') as csv_file:
            csv_write_head = csv.writer(csv_file, delimiter=',')
            csv_write_head.writerow(fields)
            csv_write_content = csv.writer(csv_file, delimiter=' ')
            csv_write_content.writerows(rows)

    #######################################################################################
    #                                     RUN FITTNG                                      #
    #######################################################################################

    def fit_recommender(self,
                        requires_icm=False,
                        requires_ucm=False,
                        load_similarity=True):
        print("Fitting model...")
        ICM_all = self.functionality.ICM_all
        UCM_all = self.functionality.UCM_all

        if not self.evaluate:

            if requires_icm and requires_ucm:
                self.recommender.fit(self.functionality.URM_all, ICM_all,
                                     UCM_all)
            elif requires_icm:
                self.recommender.fit(self.functionality.URM_all, ICM_all)
            elif requires_ucm:
                self.recommender.fit(self.functionality.URM_all, UCM_all)
            else:
                self.recommender.fit(self.functionality.URM_all)
        else:
            self.functionality.split_80_20(0.8)
            if requires_icm and requires_ucm:
                self.recommender.fit(self.functionality.URM_train,
                                     ICM_all,
                                     UCM_all,
                                     tuning=True)
            elif requires_icm:
                self.recommender.fit(self.functionality.URM_train,
                                     ICM_all,
                                     tuning=True)
            elif requires_ucm:
                self.recommender.fit(self.functionality.URM_train,
                                     UCM_all,
                                     tuning=True)
            else:
                self.recommender.fit(self.functionality.URM_train, tuning=True)
        print("Model fitted")

    #######################################################################################
    #                                 RUN RECOMMENDATION                                  #
    #######################################################################################

    def run_recommendations(self):
        recommendations = []
        saved_tuple = []
        print("Computing recommendations...")
        for user in tqdm(self.functionality.userlist_unique):
            index = [str(user) + ","]
            recommendations.clear()

            for recommendation in self.recommender.recommend(user):
                recommendations.append(recommendation)
            saved_tuple.append(index + recommendations)
        print("Recommendations computed")
        if not self.evaluate:
            print("Printing csv...")
            self.write_csv(saved_tuple, self.name)
            print("Ended")
        return saved_tuple

    #######################################################################################
    #                                   RUN COMPUTATION                                   #
    #######################################################################################

    def run(self, requires_ucm=False, requires_icm=False):
        self.functionality.get_URM()

        if requires_icm:
            self.functionality.get_ICM()

        if requires_ucm:
            self.functionality.get_UCM()

        self.functionality.get_target_users()
        self.fit_recommender(requires_icm, requires_ucm)
        if not self.evaluate:
            # Recommendations on target users are necessary only during file printing
            self.run_recommendations()
        if self.evaluate:
            evaluation.evaluate_algorithm(self.functionality.URM_test,
                                          self.recommender,
                                          at=10)
Esempio n. 15
0
    def __init__(self, dataMatrix, topK=100, shrink = 0, normalize = True,
                 asymmetric_alpha = 0.5, tversky_alpha = 1.0, tversky_beta = 1.0,
                 similarity = "cosine", row_weights = None):
        """
        Computes the cosine similarity on the columns of dataMatrix
        If it is computed on URM=|users|x|items|, pass the URM as is.
        If it is computed on ICM=|items|x|features|, pass the ICM transposed.
        :param dataMatrix:
        :param topK:
        :param shrink:
        :param normalize:           If True divide the dot product by the product of the norms
        :param row_weights:         Multiply the values in each row by a specified value. Array
        :param asymmetric_alpha     Coefficient alpha for the asymmetric cosine
        :param similarity:  "cosine"        computes Cosine similarity
                            "adjusted"      computes Adjusted Cosine, removing the average of the users
                            "asymmetric"    computes Asymmetric Cosine
                            "pearson"       computes Pearson Correlation, removing the average of the items
                            "jaccard"       computes Jaccard similarity for binary interactions using Tanimoto
                            "dice"          computes Dice similarity for binary interactions
                            "tversky"       computes Tversky similarity for binary interactions
                            "tanimoto"      computes Tanimoto coefficient for binary interactions

        """
        """
        Asymmetric Cosine as described in: 
        Aiolli, F. (2013, October). Efficient top-n recommendation for very large scale binary rated datasets. In Proceedings of the 7th ACM conference on Recommender systems (pp. 273-280). ACM.
        
        """

        super(Compute_Similarity_Python, self).__init__()

        self.helper = BaseFunction()
        self.shrink = shrink
        self.normalize = normalize

        self.n_rows, self.n_columns = dataMatrix.shape
        self.TopK = min(topK, self.n_columns)

        self.asymmetric_alpha = asymmetric_alpha
        self.tversky_alpha = tversky_alpha
        self.tversky_beta = tversky_beta

        self.dataMatrix = dataMatrix.copy()

        self.adjusted_cosine = False
        self.asymmetric_cosine = False
        self.pearson_correlation = False
        self.tanimoto_coefficient = False
        self.dice_coefficient = False
        self.tversky_coefficient = False

        if similarity == "adjusted":
            self.adjusted_cosine = True
        elif similarity == "asymmetric":
            self.asymmetric_cosine = True
        elif similarity == "pearson":
            self.pearson_correlation = True
        elif similarity == "jaccard" or similarity == "tanimoto":
            self.tanimoto_coefficient = True
            # Tanimoto has a specific kind of normalization
            self.normalize = False

        elif similarity == "dice":
            self.dice_coefficient = True
            self.normalize = False

        elif similarity == "tversky":
            self.tversky_coefficient = True
            self.normalize = False

        elif similarity == "cosine":
            pass
        else:
            raise ValueError("Cosine_Similarity: value for parameter 'mode' not recognized."
                             " Allowed values are: 'cosine', 'pearson', 'adjusted', 'asymmetric', 'jaccard', 'tanimoto',"
                             "dice, tversky."
                             " Passed value was '{}'".format(similarity))


        self.use_row_weights = False

        if row_weights is not None:

            if dataMatrix.shape[0] != len(row_weights):
                raise ValueError("Cosine_Similarity: provided row_weights and dataMatrix have different number of rows."
                                 "Col_weights has {} columns, dataMatrix has {}.".format(len(row_weights), dataMatrix.shape[0]))

            self.use_row_weights = True
            self.row_weights = row_weights.copy()
            self.row_weights_diag = sps.diags(self.row_weights)

            self.dataMatrix_weighted = self.dataMatrix.T.dot(self.row_weights_diag).T
Esempio n. 16
0
 def __init__(self, recommender, name, evaluate=True):
     print("Evaluation: " + str(evaluate))
     self.recommender = recommender
     self.evaluate = evaluate
     self.name = name
     self.functionality = BaseFunction()
Esempio n. 17
0
class Compute_Similarity_Python:


    def __init__(self, dataMatrix, topK=100, shrink = 0, normalize = True,
                 asymmetric_alpha = 0.5, tversky_alpha = 1.0, tversky_beta = 1.0,
                 similarity = "cosine", row_weights = None):
        """
        Computes the cosine similarity on the columns of dataMatrix
        If it is computed on URM=|users|x|items|, pass the URM as is.
        If it is computed on ICM=|items|x|features|, pass the ICM transposed.
        :param dataMatrix:
        :param topK:
        :param shrink:
        :param normalize:           If True divide the dot product by the product of the norms
        :param row_weights:         Multiply the values in each row by a specified value. Array
        :param asymmetric_alpha     Coefficient alpha for the asymmetric cosine
        :param similarity:  "cosine"        computes Cosine similarity
                            "adjusted"      computes Adjusted Cosine, removing the average of the users
                            "asymmetric"    computes Asymmetric Cosine
                            "pearson"       computes Pearson Correlation, removing the average of the items
                            "jaccard"       computes Jaccard similarity for binary interactions using Tanimoto
                            "dice"          computes Dice similarity for binary interactions
                            "tversky"       computes Tversky similarity for binary interactions
                            "tanimoto"      computes Tanimoto coefficient for binary interactions

        """
        """
        Asymmetric Cosine as described in: 
        Aiolli, F. (2013, October). Efficient top-n recommendation for very large scale binary rated datasets. In Proceedings of the 7th ACM conference on Recommender systems (pp. 273-280). ACM.
        
        """

        super(Compute_Similarity_Python, self).__init__()

        self.helper = BaseFunction()
        self.shrink = shrink
        self.normalize = normalize

        self.n_rows, self.n_columns = dataMatrix.shape
        self.TopK = min(topK, self.n_columns)

        self.asymmetric_alpha = asymmetric_alpha
        self.tversky_alpha = tversky_alpha
        self.tversky_beta = tversky_beta

        self.dataMatrix = dataMatrix.copy()

        self.adjusted_cosine = False
        self.asymmetric_cosine = False
        self.pearson_correlation = False
        self.tanimoto_coefficient = False
        self.dice_coefficient = False
        self.tversky_coefficient = False

        if similarity == "adjusted":
            self.adjusted_cosine = True
        elif similarity == "asymmetric":
            self.asymmetric_cosine = True
        elif similarity == "pearson":
            self.pearson_correlation = True
        elif similarity == "jaccard" or similarity == "tanimoto":
            self.tanimoto_coefficient = True
            # Tanimoto has a specific kind of normalization
            self.normalize = False

        elif similarity == "dice":
            self.dice_coefficient = True
            self.normalize = False

        elif similarity == "tversky":
            self.tversky_coefficient = True
            self.normalize = False

        elif similarity == "cosine":
            pass
        else:
            raise ValueError("Cosine_Similarity: value for parameter 'mode' not recognized."
                             " Allowed values are: 'cosine', 'pearson', 'adjusted', 'asymmetric', 'jaccard', 'tanimoto',"
                             "dice, tversky."
                             " Passed value was '{}'".format(similarity))


        self.use_row_weights = False

        if row_weights is not None:

            if dataMatrix.shape[0] != len(row_weights):
                raise ValueError("Cosine_Similarity: provided row_weights and dataMatrix have different number of rows."
                                 "Col_weights has {} columns, dataMatrix has {}.".format(len(row_weights), dataMatrix.shape[0]))

            self.use_row_weights = True
            self.row_weights = row_weights.copy()
            self.row_weights_diag = sps.diags(self.row_weights)

            self.dataMatrix_weighted = self.dataMatrix.T.dot(self.row_weights_diag).T






    def applyAdjustedCosine(self):
        """
        Remove from every Data point the average for the corresponding row
        :return:
        """

        self.dataMatrix = self.helper.check_matrix(self.dataMatrix, 'csr')


        interactionsPerRow = np.diff(self.dataMatrix.indptr)

        nonzeroRows = interactionsPerRow > 0
        sumPerRow = np.asarray(self.dataMatrix.sum(axis=1)).ravel()

        rowAverage = np.zeros_like(sumPerRow)
        rowAverage[nonzeroRows] = sumPerRow[nonzeroRows] / interactionsPerRow[nonzeroRows]


        # Split in blocks to avoid duplicating the whole Data structure
        start_row = 0
        end_row= 0

        blockSize = 1000


        while end_row < self.n_rows:

            end_row = min(self.n_rows, end_row + blockSize)

            self.dataMatrix.data[self.dataMatrix.indptr[start_row]:self.dataMatrix.indptr[end_row]] -= \
                np.repeat(rowAverage[start_row:end_row], interactionsPerRow[start_row:end_row])

            start_row += blockSize




    def applyPearsonCorrelation(self):
        """
        Remove from every Data point the average for the corresponding column
        :return:
        """

        self.dataMatrix = self.helper.check_matrix(self.dataMatrix, 'csc')


        interactionsPerCol = np.diff(self.dataMatrix.indptr)

        nonzeroCols = interactionsPerCol > 0
        sumPerCol = np.asarray(self.dataMatrix.sum(axis=0)).ravel()

        colAverage = np.zeros_like(sumPerCol)
        colAverage[nonzeroCols] = sumPerCol[nonzeroCols] / interactionsPerCol[nonzeroCols]


        # Split in blocks to avoid duplicating the whole Data structure
        start_col = 0
        end_col= 0

        blockSize = 1000


        while end_col < self.n_columns:

            end_col = min(self.n_columns, end_col + blockSize)

            self.dataMatrix.data[self.dataMatrix.indptr[start_col]:self.dataMatrix.indptr[end_col]] -= \
                np.repeat(colAverage[start_col:end_col], interactionsPerCol[start_col:end_col])

            start_col += blockSize


    def useOnlyBooleanInteractions(self):

        # Split in blocks to avoid duplicating the whole Data structure
        start_pos = 0
        end_pos= 0

        blockSize = 1000


        while end_pos < len(self.dataMatrix.data):

            end_pos = min(len(self.dataMatrix.data), end_pos + blockSize)

            self.dataMatrix.data[start_pos:end_pos] = np.ones(end_pos-start_pos)

            start_pos += blockSize




    def compute_similarity(self, start_col=None, end_col=None, block_size = 100):
        """
        Compute the similarity for the given dataset
        :param self:
        :param start_col: column to begin with
        :param end_col: column to stop before, end_col is excluded
        :return:
        """

        values = []
        rows = []
        cols = []

        start_time = time.time()
        start_time_print_batch = start_time
        processedItems = 0


        if self.adjusted_cosine:
            self.applyAdjustedCosine()

        elif self.pearson_correlation:
            self.applyPearsonCorrelation()

        elif self.tanimoto_coefficient or self.dice_coefficient or self.tversky_coefficient:
            self.useOnlyBooleanInteractions()


        # We explore the matrix column-wise
        self.dataMatrix = self.helper.check_matrix(self.dataMatrix, 'csc')


        # Compute sum of squared values to be used in normalization
        sumOfSquared = np.array(self.dataMatrix.power(2).sum(axis=0)).ravel()

        # Tanimoto does not require the square root to be applied
        if not (self.tanimoto_coefficient or self.dice_coefficient or self.tversky_coefficient):
            sumOfSquared = np.sqrt(sumOfSquared)

        if self.asymmetric_cosine:
            sumOfSquared_to_1_minus_alpha = np.power(sumOfSquared, 2 * (1 - self.asymmetric_alpha))
            sumOfSquared_to_alpha = np.power(sumOfSquared, 2 * self.asymmetric_alpha)


        self.dataMatrix = self.helper.check_matrix(self.dataMatrix, 'csc')

        start_col_local = 0
        end_col_local = self.n_columns

        if start_col is not None and start_col>0 and start_col<self.n_columns:
            start_col_local = start_col

        if end_col is not None and end_col>start_col_local and end_col<self.n_columns:
            end_col_local = end_col




        start_col_block = start_col_local

        this_block_size = 0

        # Compute all similarities for each item using vectorization
        while start_col_block < end_col_local:


            end_col_block = min(start_col_block + block_size, end_col_local)
            this_block_size = end_col_block-start_col_block



            # All Data points for a given item
            item_data = self.dataMatrix[:, start_col_block:end_col_block]
            item_data = item_data.toarray().squeeze()

            # If only 1 feature avoid last dimension to disappear
            if item_data.ndim == 1:
                item_data = np.atleast_2d(item_data)

            if self.use_row_weights:
                this_block_weights = self.dataMatrix_weighted.T.dot(item_data)

            else:
                # Compute item similarities
                this_block_weights = self.dataMatrix.T.dot(item_data)



            for col_index_in_block in range(this_block_size):

                if this_block_size == 1:
                    this_column_weights = this_block_weights
                else:
                    this_column_weights = this_block_weights[:,col_index_in_block]


                columnIndex = col_index_in_block + start_col_block
                this_column_weights[columnIndex] = 0.0

                # Apply normalization and shrinkage, ensure denominator != 0
                if self.normalize:

                    if self.asymmetric_cosine:
                        denominator = sumOfSquared_to_alpha[columnIndex] * sumOfSquared_to_1_minus_alpha + self.shrink + 1e-6
                    else:
                        denominator = sumOfSquared[columnIndex] * sumOfSquared + self.shrink + 1e-6

                    this_column_weights = np.multiply(this_column_weights, 1 / denominator)


                # Apply the specific denominator for Tanimoto
                elif self.tanimoto_coefficient:
                    denominator = sumOfSquared[columnIndex] + sumOfSquared - this_column_weights + self.shrink + 1e-6
                    this_column_weights = np.multiply(this_column_weights, 1 / denominator)

                elif self.dice_coefficient:
                    denominator = sumOfSquared[columnIndex] + sumOfSquared + self.shrink + 1e-6
                    this_column_weights = np.multiply(this_column_weights, 1 / denominator)

                elif self.tversky_coefficient:
                    denominator = this_column_weights + \
                                  (sumOfSquared[columnIndex] - this_column_weights)*self.tversky_alpha + \
                                  (sumOfSquared - this_column_weights)*self.tversky_beta + self.shrink + 1e-6
                    this_column_weights = np.multiply(this_column_weights, 1 / denominator)

                # If no normalization or tanimoto is selected, apply only shrink
                elif self.shrink != 0:
                    this_column_weights = this_column_weights/self.shrink


                #this_column_weights = this_column_weights.toarray().ravel()

                # Sort indices and select TopK
                # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
                # - Partition the Data to extract the set of relevant items
                # - Sort only the relevant items
                # - Get the original item index
                relevant_items_partition = (-this_column_weights).argpartition(self.TopK-1)[0:self.TopK]
                relevant_items_partition_sorting = np.argsort(-this_column_weights[relevant_items_partition])
                top_k_idx = relevant_items_partition[relevant_items_partition_sorting]

                # Incrementally build sparse matrix, do not add zeros
                notZerosMask = this_column_weights[top_k_idx] != 0.0
                numNotZeros = np.sum(notZerosMask)

                values.extend(this_column_weights[top_k_idx][notZerosMask])
                rows.extend(top_k_idx[notZerosMask])
                cols.extend(np.ones(numNotZeros) * columnIndex)


            # Add previous block size
            processedItems += this_block_size


            if time.time() - start_time_print_batch >= 30 or end_col_block==end_col_local:
                columnPerSec = processedItems / (time.time() - start_time + 1e-9)

                print("Similarity column {} ( {:2.0f} % ), {:.2f} column/sec, elapsed time {:.2f} min".format(
                    processedItems, processedItems / (end_col_local - start_col_local) * 100, columnPerSec, (time.time() - start_time)/ 60))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_print_batch = time.time()


            start_col_block += block_size

        # End while on columns

        W_sparse = sps.csr_matrix((values, (rows, cols)),
                                  shape=(self.n_columns, self.n_columns),
                                  dtype=np.float32)


        return W_sparse
Esempio n. 18
0
 def __init__(self):
     self.verbose = True
     self.helper = BaseFunction()
Esempio n. 19
0
 def __init__(self):
     self.helper = BaseFunction()
     self.similarityProduct = None
     self.URM = None
     self.UserCBF = None