コード例 #1
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    dataReader = Movielens10MReader()

    URM_train = dataReader.get_URM_train()
    URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()

    output_folder_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        Random, TopPop, P3alphaRecommender, RP3betaRecommender,
        ItemKNNCFRecommender, UserKNNCFRecommender,
        MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython,
        PureSVDRecommender, SLIM_BPR_Cython, SLIMElasticNetRecommender
    ]

    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator_validation = SequentialEvaluator(URM_validation, cutoff_list=[5])
    evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[5, 10])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        metric_to_optimize="MAP",
        n_cases=20,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path)

    # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)

    for recommender_class in collaborative_algorithm_list:

        try:

            runParameterSearch_Collaborative_partial(recommender_class)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()
コード例 #2
0
    def evaluateRecommendations(self,
                                URM_test,
                                at=5,
                                minRatingsPerUser=1,
                                exclude_seen=True,
                                filterCustomItems=np.array([], dtype=np.int),
                                filterCustomUsers=np.array([], dtype=np.int)):
        """
        Speed info:
        - Sparse weighgs: batch mode is 2x faster than sequential
        - Dense weighgts: batch and sequential speed are equivalent


        :param URM_test:            URM to be used for testing
        :param at: 5                    Length of the recommended items
        :param minRatingsPerUser: 1     Users with less than this number of interactions will not be evaluated
        :param exclude_seen: True       Whether to remove already seen items from the recommended items

        :param mode: 'sequential', 'parallel', 'batch'
        :param filterTopPop: False or decimal number        Percentage of items to be removed from recommended list and testing interactions
        :param filterCustomItems: Array, default empty           Items ID to NOT take into account when recommending
        :param filterCustomUsers: Array, default empty           Users ID to NOT take into account when recommending
        :return:
        """

        import warnings

        warnings.warn(
            "DEPRECATED! Use Base.Evaluation.SequentialEvaluator.evaluateRecommendations()",
            DeprecationWarning)

        from Base.Evaluation.Evaluator import SequentialEvaluator

        evaluator = SequentialEvaluator(URM_test, [at],
                                        exclude_seen=exclude_seen,
                                        minRatingsPerUser=minRatingsPerUser,
                                        ignore_items=filterCustomItems,
                                        ignore_users=filterCustomUsers)

        results_run, results_run_string = evaluator.evaluateRecommender(self)

        results_run = results_run[at]

        results_run_lowercase = {}

        for key in results_run.keys():
            results_run_lowercase[key.lower()] = results_run[key]

        return results_run_lowercase
コード例 #3
0
    def fit(self, alpha):
        evaluator_MF = SequentialEvaluator(URM_test_list=self._URM_test, cutoff_list=[10])
        #bprmf = MatrixFactorization_Cython(self._URM_train,
        #                                   positive_threshold=0,
        #                                   algorithm="MF_BPR",
        #                                   )
        # self.MF_BPRW, self.MF_BPRH = bprmf.fit(epochs=200,
        #                                       num_factors=5,
        #                                       batch_size=1,
        #                                       sgd_mode='adagrad'
        #                                       )
        #print(evaluator_MF.evaluateRecommender(bprmf))


        self.bpr_WII = SLIM_BPR_Cython(self._URM_train, positive_threshold=0, symmetric=True).fit(epochs=10,
                                                                                                   topK=200,
                                                                                                    batch_size=200,
                                                                                                   sgd_mode='adagrad',
                                                                                                   learning_rate=1e-2)

        self.bpr_WUU = SLIM_BPR_Cython(self._URM_train.T, positive_threshold=0).fit(epochs=10,
                                                                                    topK=200,
                                                                                    batch_size=200,
                                                                                    sgd_mode='adagrad',
                                                                                    learning_rate=1e-2)

        print(self.bpr_WII)
        print("\n \n max bprII: {0}".format(self.bpr_WII.max()))
        print(self.bpr_WII)
        print("\n \n max bprUU: {0}".format(self.bpr_WUU.max()))
        self._similarity_matrixUU = Cosine_Similarity(self._URM_train.T,
                                                      topK=200,
                                                      shrink=15,
                                                      normalize=True,
                                                      mode='cosine').compute_similarity()
        print("\n \n max uu: {0}".format(self._similarity_matrixUU.max()))

        # self._similarity_matrixII = Cosine_Similarity(self._URM_train.tocsc(),
        #                                              topK=200,
        #                                              shrink=10,
        #                                              normalize=True,
        #                                              mode='cosine').compute_similarity()

        # print("\n \n max II: {0}".format(self._similarity_matrixII.max()))

        self._similarity_matrixCBF = Cosine_Similarity(self._ICM.T,
                                                       topK=10,
                                                       shrink=10,
                                                       normalize=True,
                                                       mode='cosine').compute_similarity()
        # print(self._similarity_matrixII)
        self.latent_x, self.latent_y = (IALS_numpy()).fit(self._URM_train)
        print(self.latent_x.dot(self.latent_y.T))
        print("\n \n max IALS: {0}".format(self.latent_x.dot(self.latent_y.T).max()))
コード例 #4
0
    def fit(self,
            epochs=300,
            batch_size=1000,
            num_factors=10,
            learning_rate=0.01,
            sgd_mode='sgd',
            user_reg=0.0,
            positive_reg=0.0,
            negative_reg=0.0,
            stop_on_validation=False,
            lower_validatons_allowed=5,
            validation_metric="MAP",
            evaluator_object=None,
            validation_every_n=5):

        self.num_factors = num_factors
        self.sgd_mode = sgd_mode
        self.batch_size = batch_size
        self.learning_rate = learning_rate

        if evaluator_object is None and stop_on_validation:
            evaluator_object = SequentialEvaluator(self.URM_validation, [5])

        # Import compiled module
        from MatrixFactorization.Cython.MatrixFactorization_Cython_Epoch import MatrixFactorization_Cython_Epoch

        if self.algorithm == "FUNK_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "ASY_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "MF_BPR":

            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()

            URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
            URM_train_positive.eliminate_zeros()

            assert URM_train_positive.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high"

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                URM_train_positive,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=negative_reg)

        self._train_with_early_stopping(epochs,
                                        validation_every_n,
                                        stop_on_validation,
                                        validation_metric,
                                        lower_validatons_allowed,
                                        evaluator_object,
                                        algorithm_name=self.algorithm)

        self.W = self.W_best
        self.H = self.H_best

        sys.stdout.flush()
コード例 #5
0
URM_test = data.get_URM_test()

print("URM_train shape : {}".format(URM_train.shape))
print("URM_test shape : {}".format(URM_test.shape))
print("ICM shape : {}".format(ICM.shape))

print("Dimensions")

print(URM_train.shape)
print(URM_test.shape)

# ------------------------
# Instanciating Evaluators
# ------------------------

evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])
#evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])

evaluator_test = EvaluatorWrapper(evaluator_test)
#evaluator_test = EvaluatorWrapper(evaluator_test)

# ------------------------
# Recommender class definition
# ------------------------

# ------------------------
# Generating lists of weights to evaluate
# ------------------------

#alpha_list = np.arange(0.1, 1.0, 0.1)
alpha_list = np.arange(0.0, 1.05, 0.05)
コード例 #6
0
from Base.Evaluation.Evaluator import SequentialEvaluator
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
from refactored_code.utils import load_random_urms, load_icm

if __name__ == '__main__':
    urm_train, urm_test = load_random_urms()
    icm = load_icm(0.7, 0.3, 0.5)
    slim = SLIM_BPR_Cython(urm_train, positive_threshold=0,
                           symmetric=True).fit(epochs=10,
                                               topK=300,
                                               batch_size=500,
                                               sgd_mode='adagrad',
                                               learning_rate=1e-4)
    evaluator_MF = SequentialEvaluator(urm_test, cutoff_list=[10])
    print(evaluator_MF.evaluateRecommender(slim))
コード例 #7
0
recommenderDictionary = {
    DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [train_data],
    DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
        'URM_validation': buildURMMatrix(test_data)
    },
    DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
    DictionaryKeys.FIT_KEYWORD_ARGS: {
        "playlist_ids": target_data['playlist_id'],
        "validation_every_n": 5,
        "stop_on_validation": True,
        "lower_validatons_allowed": 5
    },
    DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary
}

evaluator_validation = SequentialEvaluator(buildURMMatrix(test_data),
                                           cutoff_list=[10])

evaluator_validation = EvaluatorWrapper(evaluator_validation)

parameterSearch = BayesianSearch(SLIM_BPR_Cython, evaluator_validation)

n_cases = 2
metric_to_optimize = "MAP"
output_root_path = "output/"

best_parameters = parameterSearch.search(recommenderDictionary,
                                         n_cases=n_cases,
                                         output_root_path=output_root_path,
                                         metric=metric_to_optimize)
コード例 #8
0
    def fit(self,
            epochs=300,
            logFile=None,
            batch_size=1000,
            lambda_i=0.0,
            lambda_j=0.0,
            learning_rate=1e-4,
            topK=200,
            sgd_mode='adagrad',
            gamma=0.995,
            beta_1=0.9,
            beta_2=0.999,
            stop_on_validation=False,
            lower_validatons_allowed=5,
            validation_metric="MAP",
            evaluator_object=None,
            validation_every_n=1):

        # Import compiled module
        from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch

        # Select only positive interactions
        URM_train_positive = self.URM_train.copy()

        URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
        URM_train_positive.eliminate_zeros()

        self.sgd_mode = sgd_mode
        self.epochs = epochs

        self.cythonEpoch = SLIM_BPR_Cython_Epoch(
            self.URM_mask,
            train_with_sparse_weights=self.train_with_sparse_weights,
            final_model_sparse_weights=self.sparse_weights,
            topK=topK,
            learning_rate=learning_rate,
            li_reg=lambda_i,
            lj_reg=lambda_j,
            batch_size=1,
            symmetric=self.symmetric,
            sgd_mode=sgd_mode,
            gamma=gamma,
            beta_1=beta_1,
            beta_2=beta_2)

        if (topK != False and topK < 1):
            raise ValueError(
                "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'"
                .format(topK))
        self.topK = topK

        if validation_every_n is not None:
            self.validation_every_n = validation_every_n
        else:
            self.validation_every_n = np.inf

        if evaluator_object is None and stop_on_validation:
            evaluator_object = SequentialEvaluator(self.URM_validation, [5])

        self.batch_size = batch_size
        self.lambda_i = lambda_i
        self.lambda_j = lambda_j
        self.learning_rate = learning_rate

        self._train_with_early_stopping(epochs,
                                        validation_every_n,
                                        stop_on_validation,
                                        validation_metric,
                                        lower_validatons_allowed,
                                        evaluator_object,
                                        algorithm_name=self.RECOMMENDER_NAME)

        self.get_S_incremental_and_set_W()

        sys.stdout.flush()
コード例 #9
0
ICM = data.get_ICM()

URM_train = data.get_URM_train()
URM_test = data.get_URM_test()

print("URM_train shape : {}".format(URM_train.shape))
print("URM_test shape : {}".format(URM_test.shape))
print("ICM shape : {}".format(ICM.shape))

#URM_train, URM_validation, URM_test = split_train_validation_test(URM, [0.8, 0.1, 0.1])

# ------------------------
# Instanciating Evaluators
# ------------------------

evaluator_validation = SequentialEvaluator(URM_test, cutoff_list=[10])
#evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])

evaluator_validation = EvaluatorWrapper(evaluator_validation)
#evaluator_test = EvaluatorWrapper(evaluator_test)

# ------------------------
# Recommender class definition
# ------------------------

recommender_class = ItemKNNCBFRecommender

# ------------------------
# Instanciating BayesianSearch
# ------------------------
コード例 #10
0
    def fit(self,
            epochs=300,
            logFile=None,
            batch_size=1000,
            lambda_i=0.1,
            lambda_j=0.1,
            learning_rate=1e-3,
            topK=200,
            sgd_mode='adagrad',
            gamma=0.995,
            beta_1=0.9,
            beta_2=0.999,
            stop_on_validation=False,
            lower_validatons_allowed=2,
            validation_metric="MAP",
            evaluator_object=None,
            validation_every_n=50,
            old_similarity_matrix=None,
            force_compute_sim=True):
        '''

        :param epochs: max number of epochs
        :param logFile:
        :param batch_size:
        :param lambda_i: first regualrizer
        :param lambda_j: second regualrizer
        :param learning_rate:
        :param topK:
        :param sgd_mode:
        :param gamma:
        :param beta_1:
        :param beta_2:
        :param stop_on_validation: should I stop after some validations?
        :param lower_validatons_allowed: stop after n validations that worse the previous one
        :param validation_metric:
        :param evaluator_object:
        :param validation_every_n: how often do validations?
        :param old_similarity_matrix: if you want to start from a fixed similarity matrix
        :param force_compute_sim:
        :return:
        '''

        self.lambda_i = lambda_i
        self.lambda_j = lambda_j
        self.learning_rate = learning_rate
        self.topK = topK
        self.epochs = epochs

        if not force_compute_sim:
            found = True
            try:
                with open(
                        os.path.join(
                            "IntermediateComputations", "SLIM_BPR",
                            "totURM={}_topK={}_lambdai={}_lambdaj={}_lr={}_epochs={}.pkl"
                            .format(str(len(self.URM_train.data)),
                                    str(self.topK), str(self.lambda_i),
                                    str(self.lambda_j),
                                    str(self.learning_rate),
                                    str(self.epochs))), 'rb') as handle:
                    (W_sparse_new) = pickle.load(handle)
            except FileNotFoundError:
                found = False

            if found:
                self.W_sparse = W_sparse_new
                print("Saved SLIM Matrix Used!")
                return

        if evaluator_object is None and stop_on_validation:
            print("Creating evaluator object for SLIM BPR")
            evaluator_object = SequentialEvaluator(self.URM_validation,
                                                   self.URM_train)

        # Import compiled module
        from SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch

        # Select only positive interactions
        URM_train_positive = self.URM_train.copy()

        URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
        URM_train_positive.eliminate_zeros()

        self.sgd_mode = sgd_mode
        self.epochs = epochs

        self.cythonEpoch = SLIM_BPR_Cython_Epoch(
            self.URM_mask,
            old_similarity=old_similarity_matrix,
            train_with_sparse_weights=self.train_with_sparse_weights,
            final_model_sparse_weights=self.sparse_weights,
            topK=topK,
            learning_rate=learning_rate,
            li_reg=lambda_i,
            lj_reg=lambda_j,
            batch_size=1,
            symmetric=self.symmetric,  # di default è simmetrica
            sgd_mode=sgd_mode,
            gamma=gamma,
            beta_1=beta_1,
            beta_2=beta_2)

        if topK != False and topK < 1:
            raise ValueError(
                "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'"
                .format(topK))
        self.topK = topK

        if validation_every_n is not None:
            self.validation_every_n = validation_every_n
        else:
            self.validation_every_n = np.inf

        self.batch_size = batch_size
        self.lambda_i = lambda_i
        self.lambda_j = lambda_j
        self.learning_rate = learning_rate

        self._train_with_early_stopping(epochs,
                                        validation_every_n,
                                        stop_on_validation,
                                        validation_metric,
                                        lower_validatons_allowed,
                                        evaluator_object,
                                        algorithm_name=self.RECOMMENDER_NAME)

        self.get_S_incremental_and_set_W()
        # self.normalized_SLIM()
        with open(
                os.path.join(
                    "IntermediateComputations", "SLIM_BPR",
                    "totURM={}_topK={}_lambdai={}_lambdaj={}_lr={}_epochs={}.pkl"
                    .format(str(len(self.URM_train.data)), str(self.topK),
                            str(self.lambda_i), str(self.lambda_j),
                            str(self.learning_rate), str(self.epochs))),
                'wb') as handle:
            pickle.dump(self.W_sparse,
                        handle,
                        protocol=pickle.HIGHEST_PROTOCOL)

        sys.stdout.flush()
コード例 #11
0
    def fit(self, epochs=30, batch_size = 128, num_factors=10,
            learning_rate = 0.001,
            stop_on_validation = False, lower_validatons_allowed = 5, validation_metric = "MAP",
            evaluator_object = None, validation_every_n = 1, use_cuda = True):



        if evaluator_object is None and self.URM_validation is not None:
            from Base.Evaluation.Evaluator import SequentialEvaluator

            evaluator_object = SequentialEvaluator(self.URM_validation, [10])



        self.n_factors = num_factors


        # Select only positive interactions
        URM_train_positive = self.URM_train.copy()

        URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
        URM_train_positive.eliminate_zeros()


        self.batch_size = batch_size
        self.learning_rate = learning_rate


        ########################################################################################################
        #
        #                                SETUP PYTORCH MODEL AND DATA READER
        #
        ########################################################################################################

        if use_cuda and torch.cuda.is_available():
            self.device = torch.device('cuda')
            print("MF_MSE_PyTorch: Using CUDA")
        else:
            self.device = torch.device('cpu')
            print("MF_MSE_PyTorch: Using CPU")

        from MatrixFactorization.PyTorch.MF_MSE_PyTorch_model import MF_MSE_PyTorch_model, DatasetIterator_URM

        n_users, n_items = self.URM_train.shape

        self.pyTorchModel = MF_MSE_PyTorch_model(n_users, n_items, self.n_factors).to(self.device)

        #Choose loss
        self.lossFunction = torch.nn.MSELoss(size_average=False)
        #self.lossFunction = torch.nn.BCELoss(size_average=False)
        self.optimizer = torch.optim.Adagrad(self.pyTorchModel.parameters(), lr = self.learning_rate)


        dataset_iterator = DatasetIterator_URM(self.URM_train)

        self.train_data_loader = DataLoader(dataset = dataset_iterator,
                                       batch_size = self.batch_size,
                                       shuffle = True,
                                       #num_workers = 2,
                                       )


        ########################################################################################################


        self._train_with_early_stopping(epochs, validation_every_n, stop_on_validation,
                                    validation_metric, lower_validatons_allowed, evaluator_object,
                                    algorithm_name = "MF_MSE_PyTorch")


        self.W = self.W_best.copy()
        self.H = self.H_best.copy()


        sys.stdout.flush()
コード例 #12
0
    dataReader = dataReader()

    #URM_train = dataReader.get_URM_train()
    URM_train = dataReader.get_URM_complete()
    URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()

    ICM_Art = dataReader.get_ICM_Art()
    ICM_Alb = dataReader.get_ICM_Alb()

    recommender_list = [HybridRecommender]

    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator = SequentialEvaluator(URM_test, [10], exclude_seen=True)
    evaluatorValid = SequentialEvaluator(URM_validation, [10],
                                         exclude_seen=True)

    output_root_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    logFile = open(output_root_path + "result_all_algorithms.txt", "a")

    for recommender_class in recommender_list:

        try:
コード例 #13
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    data = dataReader()

    URM_train = data.get_URM_train()
    URM_validation = data.get_URM_validation()
    URM_test = data.get_URM_test()

    ICM_alb = data.ICM_Alb
    ICM_art = data.ICM_Art

    output_root_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    collaborative_algorithm_list = [HybridRecommender]

    from ParameterTuning.AbstractClassSearch import EvaluatorWrapper
    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator_validation_earlystopping = SequentialEvaluator(URM_validation,
                                                             cutoff_list=[10])
    evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])

    evaluator_validation = EvaluatorWrapper(evaluator_validation_earlystopping)
    evaluator_test = EvaluatorWrapper(evaluator_test)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        ICM_1=ICM_art,
        ICM_2=ICM_alb,
        metric_to_optimize="MAP",
        evaluator_validation_earlystopping=evaluator_validation_earlystopping,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_root_path=output_root_path)

    # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)

    for recommender_class in collaborative_algorithm_list:

        try:

            runParameterSearch_Collaborative_partial(recommender_class)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()
コード例 #14
0
    dataReader = Movielens10MReader()

    URM_train = dataReader.get_URM_train()
    URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()

    recommender_list = [
        Random, TopPop, P3alphaRecommender, RP3betaRecommender,
        ItemKNNCFRecommender, UserKNNCFRecommender,
        MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython,
        PureSVDRecommender, SLIM_BPR_Cython, SLIMElasticNetRecommender
    ]

    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator = SequentialEvaluator(URM_test, [5, 20], exclude_seen=True)

    output_root_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    logFile = open(output_root_path + "result_all_algorithms.txt", "a")

    for recommender_class in recommender_list:

        try:

            print("Algorithm: {}".format(recommender_class))
コード例 #15
0
    algo = SlopeOne()
    trainset = data.build_full_trainset()
    algo.fit(trainset)'''

    URM_test = dataReader.get_URM_test()

    URM_validation = dataReader.get_URM_validation()

    ICM_Art = dataReader.get_ICM_Art()
    ICM_Alb = dataReader.get_ICM_Alb()

    recommender_list = [HybridRecommender]

    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator = SequentialEvaluator(URM_test, [10], exclude_seen=True)
    evaluatorValid = SequentialEvaluator(URM_validation, [10],
                                         exclude_seen=True)

    output_root_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    logFile = open(output_root_path + "result_all_algorithms.txt", "a")

    for recommender_class in recommender_list:

        try:
コード例 #16
0
def hybrid_repo(is_test):
    b = Builder()
    ev = Evaluator()
    ev.split()
    ICM = b.build_ICM()

    URM_train, URM_test = train_test_holdout(b.get_URM(), train_perc=0.8)
    URM_train, URM_validation = train_test_holdout(URM_train, train_perc=0.9)

    from ParameterTuning.AbstractClassSearch import EvaluatorWrapper
    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator_validation = SequentialEvaluator(URM_validation, cutoff_list=[5])
    evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[5, 10])

    evaluator_validation = EvaluatorWrapper(evaluator_validation)
    evaluator_test = EvaluatorWrapper(evaluator_test)

    from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
    from ParameterTuning.BayesianSearch import BayesianSearch

    recommender_class = ItemKNNCFRecommender

    parameterSearch = BayesianSearch(recommender_class,
                                     evaluator_validation=evaluator_validation,
                                     evaluator_test=evaluator_test)

    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    hyperparamethers_range_dictionary = {}
    hyperparamethers_range_dictionary["topK"] = [
        5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
    ]
    hyperparamethers_range_dictionary["shrink"] = [
        0, 10, 50, 100, 200, 300, 500, 1000
    ]
    hyperparamethers_range_dictionary["similarity"] = ["cosine"]
    hyperparamethers_range_dictionary["normalize"] = [True, False]

    recommenderDictionary = {
        DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
        DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
        DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
        DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
        DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
        hyperparamethers_range_dictionary
    }

    output_root_path = "result_experiments/"

    import os

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    output_root_path += recommender_class.RECOMMENDER_NAME

    n_cases = 2
    metric_to_optimize = "MAP"

    best_parameters = parameterSearch.search(recommenderDictionary,
                                             n_cases=n_cases,
                                             output_root_path=output_root_path,
                                             metric=metric_to_optimize)

    itemKNNCF = ItemKNNCFRecommender(URM_train)
    itemKNNCF.fit(**best_parameters)

    from FW_Similarity.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg

    n_cases = 2
    metric_to_optimize = "MAP"

    best_parameters_ItemKNNCBF = parameterSearch.search(
        recommenderDictionary,
        n_cases=n_cases,
        output_root_path=output_root_path,
        metric=metric_to_optimize)

    itemKNNCBF = ItemKNNCBFRecommender(ICM, URM_train)
    itemKNNCBF.fit(**best_parameters_ItemKNNCBF)
    """
    #_____________________________________________________________________
    from ParameterTuning.BayesianSearch import BayesianSearch
    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    from ParameterTuning.AbstractClassSearch import EvaluatorWrapper

    evaluator_validation_tuning = EvaluatorWrapper(evaluator_validation)
    evaluator_test_tuning = EvaluatorWrapper(evaluator_test)

    recommender_class = CFW_D_Similarity_Linalg

    parameterSearch = BayesianSearch(recommender_class,
                                     evaluator_validation=evaluator_validation_tuning,
                                     evaluator_test=evaluator_test_tuning)

    hyperparamethers_range_dictionary = {}
    hyperparamethers_range_dictionary["topK"] = [5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800]
    hyperparamethers_range_dictionary["add_zeros_quota"] = range(0, 1)
    hyperparamethers_range_dictionary["normalize_similarity"] = [True, False]

    recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train, ICM, itemKNNCF.W_sparse],
                             DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                             DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
                             DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
                             DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary}

    output_root_path = "result_experiments/"

    import os

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    output_root_path += recommender_class.RECOMMENDER_NAME

    n_cases = 2
    metric_to_optimize = "MAP"

    best_parameters_CFW_D = parameterSearch.search(recommenderDictionary,
                                                   n_cases=n_cases,
                                                   output_root_path=output_root_path,
                                                   metric=metric_to_optimize)

    CFW_weithing = CFW_D_Similarity_Linalg(URM_train, ICM, itemKNNCF.W_sparse)
    CFW_weithing.fit(**best_parameters_CFW_D)
    #___________________________________________________________________________________________-

    """

    from GraphBased.P3alphaRecommender import P3alphaRecommender

    P3alpha = P3alphaRecommender(URM_train)
    P3alpha.fit()

    from MatrixFactorization.PureSVD import PureSVDRecommender

    #pureSVD = PureSVDRecommender(URM_train)
    #pureSVD.fit()

    rec = HybridRec.HybridRec()

    S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 600)
    S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250)

    rec.fit(ev.get_URM_train(),
            ev.get_target_playlists(),
            ev.get_target_tracks(),
            ev.num_playlists_to_test,
            itemKNNCBF.W_sparse,
            itemKNNCF.W_sparse,
            P3alpha.W_sparse,
            is_test=True,
            alfa=0.7,
            avg=0.3)

    train_df = rec.recommend()

    if is_test:
        map5 = ev.map5(train_df)
        print('Hybrid MAP@10:', map5)
        return map5
    else:
        print('Prediction saved!')
        train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] +
                        "/all/sub.csv",
                        sep=',',
                        index=False)
        return 0

    #hybridrecommender = ItemKNNSimilarityHybridRecommender(URM_train, itemKNNCF.W_sparse, P3alpha.W_sparse)
    #hybridrecommender.fit(alpha=0.5)

    #print(evaluator_validation.evaluateRecommender(hybridrecommender))
    """
def run():
    evaluate_algorithm = True
    delete_old_computations = False
    slim_after_hybrid = False

    # delete_previous_intermediate_computations()
    # if not evaluate_algorithm:
    #     delete_previous_intermediate_computations()
    # else:
    #     print("ATTENTION: old intermediate computations kept, pay attention if running with all_train")
    # delete_previous_intermediate_computations()
    filename = "hybrid_ICB_ICF_UCF_IALS_SLIM_ELASTIC_local_081962.csv"

    dataReader = RS_Data_Loader(all_train=not evaluate_algorithm)

    URM_train = dataReader.get_URM_train()
    URM_PageRank_train = dataReader.get_page_rank_URM()
    URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()
    ICM = dataReader.get_ICM()
    UCM_tfidf = dataReader.get_tfidf_artists()
    # _ = dataReader.get_tfidf_album()

    recommender_list1 = [
        # Random,
        # TopPop,
        ItemKNNCBFRecommender,
        # UserKNNCBRecommender,
        ItemKNNCFRecommender,
        UserKNNCFRecommender,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        IALS_numpy,
        SLIM_BPR_Cython,
        # ItemKNNCFRecommenderFAKESLIM,
        # PureSVDRecommender,
        SLIMElasticNetRecommender
    ]

    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator = SequentialEvaluator(URM_test, URM_train, exclude_seen=True)

    output_root_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    logFile = open(output_root_path + "result_all_algorithms.txt", "a")

    try:
        recommender_class = HybridRecommender
        print("Algorithm: {}".format(recommender_class))
        '''
        Our optimal run
        '''
        recommender_list = recommender_list1  # + recommender_list2  # + recommender_list3

        onPop = False

        # On pop it used to choose if have dynamic weights for
        recommender = recommender_class(URM_train,
                                        ICM,
                                        recommender_list,
                                        URM_PageRank_train=URM_PageRank_train,
                                        dynamic=False,
                                        UCM_train=UCM_tfidf,
                                        URM_validation=URM_validation,
                                        onPop=onPop)

        recommender.fit(
            **{
                "topK": [10, 181, 82, -1, 761, 490],
                "shrink": [8, 0, 3, -1, -1, -1],
                "pop": [280],
                "weights": [
                    0.47412263345597117, 1.3864620551711606,
                    0.6224999770898935, 1.5498327677561246, 0.1993692779443738,
                    2.113324096784624
                ],
                "final_weights": [1, 1],
                "force_compute_sim":
                False,  # not evaluate_algorithm,
                "feature_weighting_index":
                0,
                "epochs":
                150,
                'lambda_i': [0.0],
                'lambda_j': [1.0153577332223556e-08],
                'SLIM_lr': [0.1],
                'alphaP3': [0.4121720883248633],
                'alphaRP3': [0.8582865731462926],
                'betaRP': [0.2814208416833668],
                'l1_ratio':
                3.020408163265306e-06,
                'alpha':
                0.0014681984611695231,
                'tfidf': [True],
                "weights_to_dweights":
                -1,
                "IALS_num_factors":
                290,
                "IALS_reg":
                0.001,
                "IALS_iters":
                6,
                "IALS_scaling":
                'log',
                "IALS_alpha":
                40,
                "filter_top_pop_len":
                0
            })

        print("TEST")

        print("Starting Evaluations...")
        # to indicate if plotting for lenght or for pop

        results_run, results_run_string, target_recommendations = evaluator.evaluateRecommender(
            recommender, plot_stats=False, onPop=onPop)

        print("Algorithm: {}, results: \n{}".format(
            [rec.RECOMMENDER_NAME for rec in recommender.recommender_list],
            results_run_string))
        logFile.write("Algorithm: {}, results: \n{} time: {}".format(
            [rec.RECOMMENDER_NAME for rec in recommender.recommender_list],
            results_run_string, time.time()))
        logFile.flush()

        if not evaluate_algorithm:
            target_playlist = dataReader.get_target_playlist()
            md.assign_recomendations_to_correct_playlist(
                target_playlist, target_recommendations)
            md.make_CSV_file(target_playlist, filename)
            print('File {} created!'.format(filename))

    except Exception as e:
        traceback.print_exc()
        logFile.write("Algorithm: {} - Exception: {}\n".format(
            recommender_class, str(e)))
        logFile.flush()
コード例 #18
0
ファイル: run_example.py プロジェクト: mindis/CFeCBF
# https://mmprj.github.io/mtrm_dataset/index
dataSplitter = DataSplitter_Warm_k_fold(dataReader)
dataSplitter.load_data()

# Each URM is a scipy.sparse matrix of shape |users|x|items|
URM_train, URM_validation, URM_test = dataSplitter.get_holdout_split()

# The ICM is a scipy.sparse matrix of shape |items|x|features|
ICM = dataSplitter.get_ICM_from_name("ICM_genre")

# This contains the items to be ignored during the evaluation step
# In a cold items setting this should contain the indices of the warm items
ignore_items = []

evaluator_validation = SequentialEvaluator(URM_validation,
                                           cutoff_list=[5],
                                           ignore_items=ignore_items)
evaluator_test = SequentialEvaluator(URM_test,
                                     cutoff_list=[5],
                                     ignore_items=ignore_items)

# This is used by the ML model of CFeCBF to perform early stopping and may be omitted.
# ICM_target allows to set a different ICM for this validation step, providing flexibility in including
# features present in either validation or test but not in train
evaluator_validation_earlystopping = EvaluatorCFW_D_wrapper(
    evaluator_validation, ICM_target=ICM, model_to_use="incremental")

# We compute the similarity matrix resulting from a RP3beta recommender
# Note that we have not included the code for parameter tuning, which should be done

cf_parameters = {
コード例 #19
0
ファイル: main.py プロジェクト: BlancNicolas/RecSys_challenge
from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender
from KNN.UserKNNCFRecommender import UserKNNCFRecommender
from ParameterTuning.AbstractClassSearch import writeLog
from data_splitter import train_test_holdout
import itertools


data = Data()
URM = data.get_URM()
ICM = data.get_ICM()

URM_train, URM_test = train_test_holdout(URM, train_perc=0.8)

print("Shape : {}".format(URM_train.__sizeof__()))

evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])
evaluator_test = EvaluatorWrapper(evaluator_test)

output_root_path = "result_experiments/"

filename = P3alphaRecommender.RECOMMENDER_NAME \
           + ItemKNNCBFRecommender.RECOMMENDER_NAME \
           + ItemKNNCFRecommender.RECOMMENDER_NAME \
           + "hybrid_opt"

output_root_path += filename
output_file = open(output_root_path, "a")

P3alpha = P3alphaRecommender(URM_train)
P3alpha.fit(topK=100, alpha=0.7905462550621185, implicit=True, normalize_similarity=True)
# print("-------------------")
    def fit(self,
            epochs=5000,
            batch_size=1000,
            num_factors=80,
            learning_rate=0.001,
            sgd_mode='adagrad',
            user_reg=0.0,
            positive_reg=0.01,
            negative_reg=0.01,
            stop_on_validation=False,
            lower_validatons_allowed=5,
            validation_metric="MAP",
            evaluator_object=None,
            validation_every_n=5,
            force_compute_sim=True):

        if not force_compute_sim:
            found = True
            try:
                with open(
                        os.path.join("IntermediateComputations",
                                     "MFMatrix.pkl"), 'rb') as handle:
                    (W_new, H_new) = pickle.load(handle)
            except FileNotFoundError:
                found = False

            if found:
                self.W = W_new
                self.H = H_new
                print("Saved MF Matrix Used!")
                return

        self.num_factors = num_factors
        self.sgd_mode = sgd_mode
        self.batch_size = batch_size
        self.learning_rate = learning_rate

        if evaluator_object is None and stop_on_validation:
            evaluator_object = SequentialEvaluator(self.URM_validation, [10])

        # Import compiled module
        from MatrixFactorization.Cython.MatrixFactorization_Cython_Epoch import MatrixFactorization_Cython_Epoch

        if self.algorithm == "FUNK_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "ASY_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=32,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "MF_BPR":

            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()

            URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
            URM_train_positive.eliminate_zeros()

            assert URM_train_positive.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high"

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                URM_train_positive,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=batch_size,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=negative_reg)

        self._train_with_early_stopping(epochs,
                                        validation_every_n,
                                        stop_on_validation,
                                        validation_metric,
                                        lower_validatons_allowed,
                                        evaluator_object,
                                        algorithm_name=self.algorithm)

        self.W = self.W_best
        self.H = self.H_best

        with open(os.path.join("IntermediateComputations", "MFMatrix.pkl"),
                  'wb') as handle:
            pickle.dump((self.W, self.H),
                        handle,
                        protocol=pickle.HIGHEST_PROTOCOL)

        sys.stdout.flush()
コード例 #21
0
    #
    # d_best = [[0.4, 0.03863232277574469, 0.008527738266632112, 0.2560912624445676, 0.7851755932819731,
    #            0.4112843940329439],
    #           [0.2, 0.012499871230102988, 0.020242981888115352, 0.9969708006657074, 0.9999132876156388,
    #            0.6888103295594851],
    #           [0.2, 0.10389111810225915, 0.14839466129917822, 0.866992903043857, 0.07010619211847613,
    #            0.5873532658846817]]

    # BEST RESULT : d_weights = [[0.5, 0.5, 0], [0.4, 0.4, 0.2], [0, 0.8, 0.2], [0, 0.5, 0.5]]

    # Dynamics for Hybrid with Top_N. usefull for testing where each recommender works better
    # d_weights = [[2, 4, 0], [1, 4, 5], [0, 2, 8]]

    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator = SequentialEvaluator(URM_test, URM_train, exclude_seen=True)

    output_root_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    logFile = open(output_root_path + "result_all_algorithms.txt", "a")

    transfer_learning = False
    if transfer_learning:
        recommender_IB = ItemKNNCFRecommender(URM_train)
        recommender_IB.fit(200, 15)
        transfer_matrix = recommender_IB.W_sparse
    else:
コード例 #22
0
def run():
    evaluate_algorithm = False
    delete_old_computations = False
    slim_after_hybrid = False

    # delete_previous_intermediate_computations()
    # if not evaluate_algorithm:
    #     delete_previous_intermediate_computations()
    # else:
    #     print("ATTENTION: old intermediate computations kept, pay attention if running with all_train")
    # delete_previous_intermediate_computations()
    filename = "hybrid_ICB_ICF_UCF_SLIM_ELASTIC_local_08052.csv"

    dataReader = RS_Data_Loader(all_train=not evaluate_algorithm)

    URM_train = dataReader.get_URM_train()
    URM_PageRank_train = dataReader.get_page_rank_URM()
    URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()
    ICM = dataReader.get_ICM()
    UCM_tfidf = dataReader.get_tfidf_artists()
    # _ = dataReader.get_tfidf_album()

    recommender_list1 = [
        # Random,
        # TopPop,
        ItemKNNCBFRecommender,
        # UserKNNCBRecommender,
        ItemKNNCFRecommender,
        UserKNNCFRecommender,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        SLIM_BPR_Cython,
        # ItemKNNCFRecommenderFAKESLIM,
        # PureSVDRecommender,
        SLIMElasticNetRecommender
    ]

    # ITEM CB, ITEM CF, USER CF, RP3BETA, PURE SVD
    recommender_list2 = [
        # Random,
        # TopPop,
        ItemKNNCBFRecommender,
        # UserKNNCBRecommender,
        # ItemKNNCFPageRankRecommender,
        ItemKNNCFRecommender,
        UserKNNCFRecommender,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender
        # PureSVDRecommender
    ]

    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator = SequentialEvaluator(URM_test, URM_train, exclude_seen=True)

    output_root_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    logFile = open(output_root_path + "result_all_algorithms.txt", "a")

    try:
        recommender_class = HybridRecommender
        print("Algorithm: {}".format(recommender_class))

        '''
        Our optimal run
        '''
        recommender_list = recommender_list1  # + recommender_list2  # + recommender_list3

        onPop = False

        # On pop it used to choose if have dynamic weights for
        recommender = recommender_class(URM_train, ICM, recommender_list, URM_PageRank_train=URM_PageRank_train,
                                        dynamic=False, UCM_train=UCM_tfidf,
                                        URM_validation=URM_validation, onPop=onPop)

        lambda_i = 0.1
        lambda_j = 0.05
        old_similrity_matrix = None
        num_factors = 395
        l1_ratio = 1e-06

        # Variabili secondo intervallo
        alphaRP3_2 = 0.9223827655310622
        betaRP3_2 = 0.2213306613226453
        num_factors_2 = 391

        recommender.fit(**
                        {
                            "topK": [10, 33, 160, 761, 490],
                            "shrink": [8, 26, 2, -1, -1],
                            "pop": [280],
                            "weights": [0.33804686720093335, 1.3092081994688194, 0.642288869881126, 0.18883962446529368,
                                        1.9317211019160747],
                            "final_weights": [1, 1],
                            "force_compute_sim": False,  # not evaluate_algorithm,
                            "feature_weighting_index": 0,
                            "epochs": 150,
                            'lambda_i': [0.0], 'lambda_j': [1.0153577332223556e-08], 'SLIM_lr': [0.1],
                            'alphaP3': [0.4121720883248633],
                            'alphaRP3': [0.8582865731462926],
                            'betaRP': [0.2814208416833668],
                            'l1_ratio': 3.020408163265306e-06,
                            'alpha': 0.0014681984611695231,
                            'tfidf': [True],
                            "weights_to_dweights": -1,
                            "filter_top_pop_len": 0})

        print("TEST")

        print("Starting Evaluations...")
        # to indicate if plotting for lenght or for pop

        results_run, results_run_string, target_recommendations = evaluator.evaluateRecommender(recommender,
                                                                                                plot_stats=False,
                                                                                                onPop=onPop)

        print("Algorithm: {}, results: \n{}".format([rec.RECOMMENDER_NAME for rec in recommender.recommender_list],
                                                    results_run_string))
        logFile.write("Algorithm: {}, results: \n{} time: {}".format(
            [rec.RECOMMENDER_NAME for rec in recommender.recommender_list], results_run_string, time.time()))
        logFile.flush()

        if not evaluate_algorithm:
            target_playlist = dataReader.get_target_playlist()
            md.assign_recomendations_to_correct_playlist(target_playlist, target_recommendations)
            md.make_CSV_file(target_playlist, filename)
            print('File {} created!'.format(filename))


    except Exception as e:
        traceback.print_exc()
        logFile.write("Algorithm: {} - Exception: {}\n".format(recommender_class, str(e)))
        logFile.flush()
コード例 #23
0
    evaluate_algorithm = True
    slim_after_hybrid = False

    # delete_previous_intermediate_computations()

    filename = "hybrid_UserContentMatrix"

    dataReader = RS_Data_Loader(top10k=True, all_train=not evaluate_algorithm)

    URM_train = dataReader.get_URM_train()
    URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()

    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator = SequentialEvaluator(URM_test, URM_train, exclude_seen=True)

    with open(
            os.path.join("IntermediateComputations",
                         "Clusterization_Kmeans_3.pkl"), 'rb') as handle:
        clusters = pickle.load(handle)

    cl_0_ind = []
    cl_1_ind = []
    cl_2_ind = []
    for elem in clusters:
        if elem == 0:
            cl_0_ind.append(elem)
        elif elem == 1:
            cl_1_ind.append(elem)
        else:
def run():
    evaluate_algorithm = True
    delete_old_computations = False
    slim_after_hybrid = False

    # delete_previous_intermediate_computations()
    # if not evaluate_algorithm:
    #     delete_previous_intermediate_computations()
    # else:
    #     print("ATTENTION: old intermediate computations kept, pay attention if running with all_train")
    # delete_previous_intermediate_computations()
    filename = "hybrid_ICB_ICF_UCF_SLIM_ELASTIC_local_08052.csv"

    dataReader = RS_Data_Loader(all_train=not evaluate_algorithm)

    URM_train = dataReader.get_URM_train()
    URM_PageRank_train = dataReader.get_page_rank_URM()
    URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()
    ICM = dataReader.get_ICM()
    UCM_tfidf = dataReader.get_tfidf_artists()
    # _ = dataReader.get_tfidf_album()

    recommender_list1 = [
        # Random,
        # TopPop,
        ItemKNNCBFRecommender,
        # UserKNNCBRecommender,
        ItemKNNCFRecommender,
        UserKNNCFRecommender,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        SLIM_BPR_Cython,
        # ItemKNNCFRecommenderFAKESLIM,
        # PureSVDRecommender,
        SLIMElasticNetRecommender
    ]

    # ITEM CB, ITEM CF, USER CF, RP3BETA, PURE SVD
    recommender_list2 = [
        # Random,
        # TopPop,
        ItemKNNCBFRecommender,
        # UserKNNCBRecommender,
        ItemKNNCFRecommender,
        UserKNNCFRecommender,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        SLIM_BPR_Cython,
        # ItemKNNCFRecommenderFAKESLIM,
        # PureSVDRecommender,
        SLIMElasticNetRecommender
    ]

    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator = SequentialEvaluator(URM_test, URM_train, exclude_seen=True)

    output_root_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    logFile = open(output_root_path + "result_all_algorithms.txt", "a")

    try:
        recommender_class = HybridRecommender
        print("Algorithm: {}".format(recommender_class))

        '''
        Our optimal run
        '''
        recommender_list = recommender_list1 + recommender_list2  # + recommender_list3

        d_weights = [
            [0.5469789514168496, 1.5598358421050373, 1.1505851198615593, 0.2540023047558251, 0.9403502151872645] + [
                0] * len(recommender_list2),
            [0] * len(recommender_list1) + [0.5205017325111618, 1.6831295912149837, 1.6560707664775454,
                                            0.3144197724407203, 1.9912784665282535]
        ]

        onPop = False

        # On pop it used to choose if have dynamic weights for
        recommender = recommender_class(URM_train, ICM, recommender_list, URM_PageRank_train=URM_PageRank_train,
                                        dynamic=True, UCM_train=UCM_tfidf, d_weights=d_weights,
                                        URM_validation=URM_validation, onPop=onPop)

        recommender.fit(**
                        {
                            "topK": [10, 33, 160, 761, 490] + [10, 33, 160, 761, 490],
                            "shrink": [8, 26, 2, -1, -1] + [8, 26, 2, -1, -1],
                            "pop": [30],
                            "weights": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                            "final_weights": [1, 1],
                            "force_compute_sim": False,  # not evaluate_algorithm,
                            "feature_weighting_index": [0, 0],
                            "epochs": 150,
                            'lambda_i': [0.0, 0.0], 'lambda_j': [1.0153577332223556e-08, 1.0153577332223556e-08],
                            'SLIM_lr': [0.1, 0.1],
                            'alphaP3': [0.4121720883248633],
                            'alphaRP3': [0.8582865731462926],
                            'betaRP': [0.2814208416833668],
                            'l1_ratio': [3.020408163265306e-06, 3.020408163265306e-06],
                            'alpha': [0.0014681984611695231, 0.0014681984611695231],
                            'tfidf': [True, True],
                            "weights_to_dweights": -1,
                            "filter_top_pop_len": 0})

        print("TEST")

        print("Starting Evaluations...")
        # to indicate if plotting for lenght or for pop

        results_run, results_run_string, target_recommendations = evaluator.evaluateRecommender(recommender,
                                                                                                plot_stats=True,
                                                                                                onPop=onPop)

        print("Algorithm: {}, results: \n{}".format([rec.RECOMMENDER_NAME for rec in recommender.recommender_list],
                                                    results_run_string))
        logFile.write("Algorithm: {}, results: \n{} time: {} \n".format(
            [rec.RECOMMENDER_NAME for rec in recommender.recommender_list], results_run_string, time.time()))
        logFile.flush()

        if not evaluate_algorithm:
            target_playlist = dataReader.get_target_playlist()
            md.assign_recomendations_to_correct_playlist(target_playlist, target_recommendations)
            md.make_CSV_file(target_playlist, filename)
            print('File {} created!'.format(filename))


    except Exception as e:
        traceback.print_exc()
        logFile.write("Algorithm: {} - Exception: {}\n".format(recommender_class, str(e)))
        logFile.flush()