Exemple #1
0
    def _split_data_from_original_dataset(self, save_folder_path):

        self.dataReader_object.load_data()
        self._load_from_DataReader_ICM_and_mappers()

        URM = self.dataReader_object.get_URM_all()
        URM = sps.csr_matrix(URM)

        split_number = 2
        if self.use_validation_set:
            split_number += 1

        # Min interactions at least self.k_out_value for each split +1 for train and validation
        min_user_interactions = (split_number - 1) * self.k_out_value + 1

        if not self.allow_cold_users:
            user_interactions = np.ediff1d(URM.indptr)
            user_to_preserve = user_interactions >= min_user_interactions
            user_to_remove = np.logical_not(user_to_preserve)

            self._print(
                "Removing {} ({:.2f} %) of {} users because they have less than the {} interactions required for {} splits ({} for test [and validation if requested] +1 for train)"
                .format(URM.shape[0] - user_to_preserve.sum(),
                        (1 - user_to_preserve.sum() / URM.shape[0]) * 100,
                        URM.shape[0], min_user_interactions, split_number,
                        self.k_out_value))

            URM = URM[user_to_preserve, :]

            self.SPLIT_GLOBAL_MAPPER_DICT[
                "user_original_ID_to_index"] = reconcile_mapper_with_removed_tokens(
                    self.SPLIT_GLOBAL_MAPPER_DICT["user_original_ID_to_index"],
                    np.arange(0, len(user_to_remove),
                              dtype=np.int)[user_to_remove])

        splitted_data = split_train_leave_k_out_user_wise(
            URM,
            k_out=self.k_out_value,
            use_validation_set=self.use_validation_set,
            leave_random_out=self.leave_random_out)

        if self.use_validation_set:
            URM_train, URM_validation, URM_test = splitted_data

        else:
            URM_train, URM_test = splitted_data

        self.SPLIT_URM_DICT = {
            "URM_train": URM_train,
            "URM_test": URM_test,
        }

        if self.use_validation_set:
            self.SPLIT_URM_DICT["URM_validation"] = URM_validation

        self._save_split(save_folder_path)

        self._print("Split complete")
        for user_id in user_id_array:
            scores = np.dot(self.user_factors[user_id], self.item_factors.T)
            scores = np.squeeze(scores)
            scores_list.append(scores)

        return np.asarray(scores_list, dtype=np.float32)

    def save_model(self, folder_path, file_name = None):
        print("Saving not implemented...")


if __name__ == '__main__':

    ALS_args = {
        'n_factors': 433,
        'iterations': 29,
        'regularization': 1.707545716729426e-05,
        'alpha_val' : 5
    }

    train, test = split_train_leave_k_out_user_wise(get_data()['URM_all'], k_out=1)
    evaluator = EvaluatorHoldout(test, [10], target_users=get_data()['target_users'])

    als = ALSRecommender(train)
    als.fit(n_factors=ALS_args['n_factors'],
            regularization=ALS_args['regularization'],
            iterations=ALS_args['iterations'],
            alpha_val=ALS_args['alpha_val'])

    result, result_string = evaluator.evaluateRecommender(als)
    print(f"MAP: {result[10]['MAP']:.5f}")
Exemple #3
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    URM_train, URM_test = split_train_leave_k_out_user_wise(
        get_data()['URM_all'], k_out=1)
    URM_train, URM_validation = split_train_leave_k_out_user_wise(URM_train,
                                                                  k_out=1)

    output_folder_path = "result_experiments/SKOPT_prova/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # HybridRecommender
        ALSRecommender
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # IALSRecommender,
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
    ]

    from Algorithms.Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(
        URM_validation,
        cutoff_list=[10],
        target_users=get_data()['target_users'])
    evaluator_test = EvaluatorHoldout(URM_test,
                                      cutoff_list=[10],
                                      target_users=get_data()['target_users'])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        metric_to_optimize="MAP",
        # TODO change num of iterations here
        n_cases=100,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path)

    from Algorithms.Utils.PoolWithSubprocess import PoolWithSubprocess

    # pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
    # pool.close()
    # pool.join()

    for recommender_class in collaborative_algorithm_list:

        try:

            runParameterSearch_Collaborative_partial(recommender_class)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()