def write_submission(self, users):
        """
        This method is used to write the submission, selecting only chosen algorithms
        :return:
        """

        recommender = WeightedHybrid(self.urm_train,
                                     self.icm,
                                     self.p_icfknn,
                                     self.p_ucfknn,
                                     self.p_cbfknn,
                                     self.p_slimbpr,
                                     self.p_puresvd,
                                     self.p_als,
                                     self.p_cfw,
                                     self.p_p3a,
                                     self.p_rp3b,
                                     self.p_slimen,
                                     WeightConstants.NO_WEIGHTS[0],
                                     seen_items=self.urm_train)
        recommender.fit()

        from tqdm import tqdm

        for user_id in tqdm(users):
            recs = recommender.recommend(user_id, at=20)
            self.writer.write(self.writer,
                              user_id,
                              recs,
                              sub_counter=submission_counter)

        print("Submission file written")
    def write_submission(self, users):
        """
        This method is used to write the submission, selecting only chosen algorithms
        :return:
        """
        self.writer.write_header(self.writer, sub_counter=submission_counter)

        from SLIM.SLIM_BPR_Cython import SLIM_BPR_Cython
        slim_bpr = SLIM_BPR_Cython(self.icm)
        slim_bpr.fit(**WeightConstants.SLIM_BPR)

        self.icm = slim_bpr.recs.copy().tocsr()

        recommender = WeightedHybrid(self.urm_train, self.icm, self.p_icfknn,
                                     self.p_ucfknn, self.p_cbfknn,
                                     self.p_slimbpr, self.p_puresvd,
                                     self.p_als, self.p_cfw, self.p_p3a,
                                     self.p_rp3b, self.p_slimen,
                                     WeightConstants.SUBM_WEIGHTS)
        recommender.fit()

        from tqdm import tqdm

        for user_id in tqdm(users):
            recs = recommender.recommend(user_id, at=10)
            self.writer.write(self.writer,
                              user_id,
                              recs,
                              sub_counter=submission_counter)

        print("Submission file written")
    def post_validation(self, hyp):

        self.recommender = WeightedHybrid(self.urm_post_validation, self.icm,
                                          self.rebuild_single_KNN(hyp[0:7]),
                                          self.rebuild_single_KNN(hyp[7:14]),
                                          self.rebuild_single_KNN(hyp[14:21]),
                                          self.rebuild_slim(hyp[21:28]),
                                          self.rebuild_puresvd(hyp[28:29]),
                                          None, None,
                                          self.rebuild_p3a(hyp[29:32]),
                                          self.rebuild_rp3beta(hyp[32:36]),
                                          self.rebuild_weights(hyp[36:]))
        self.recommender.fit()
        result = evaluate_algorithm(self.urm_test, self.recommender, at=10)
        self.writer.write_report("\n\n" + str(result), self.report_counter)
    def evaluate_single(self, hyp):
        self.recommender = WeightedHybrid(self.urm_train,
                                          self.icm,
                                          p_icfknn=None,
                                          p_ucfknn=None,
                                          p_cbfknn=None,
                                          p_slimbpr=None,
                                          p_puresvd=None,
                                          p_als=self.rebuild_als(hyp[0:]),
                                          p_cfw=None,
                                          p_p3a=None,
                                          p_rp3b=None,
                                          weights={"als": 1})
        self.recommender.fit()
        result = evaluate_algorithm(self.urm_test, self.recommender, at=10)

        return float(result["MAP"] * (-1))
예제 #5
0
    def evaluate(self, index: int, target_users_profile):
        """
        This method capture the predictions of the CrossValidation running the Hybrid
        :param index: number of iteration (from 1 to 4) depending on the current sub-URM
        :param target_users_profile: profile of the users wanted to predict
        :return:
        """
        weight = {
            'icfknn': 1,
            'ucfknn': 1,
            'cbfknn': 1,
            'slimbpr': 1,
            'puresvd': 1,
            'als': 1,
            'cfw': 1,
            'p3a': 1,
            'rp3b': 1,
            'slimen': 1
        }

        recommender = WeightedHybrid(self.urm_train,
                                     self.icm,
                                     self.p_icfknn,
                                     self.p_ucfknn,
                                     self.p_cbfknn,
                                     self.p_slimbpr,
                                     self.p_puresvd,
                                     self.p_als,
                                     self.p_cfw,
                                     self.p_p3a,
                                     self.p_rp3b,
                                     self.p_slimen,
                                     weight,
                                     seen_items=target_users_profile)
        recommender.fit()

        user_ids = []
        item_ids = []

        # SELECTING THE BEST RECOMMENDATIONS
        for n_user in self.target_users:
            recommendations = recommender.recommend(n_user, at=self.cutoff)
            user_ids.extend([n_user] * len(recommendations))
            item_ids.extend(recommendations)

        return user_ids, item_ids
    def evaluate(self, hyp):
        # print("NUMBER OF PARAMETERS ON evaluate():" + str(len(hyp)))

        self.recommender = WeightedHybrid(self.urm_train, self.icm,
                                          self.rebuild_single_KNN(hyp[0:7]),
                                          self.rebuild_single_KNN(hyp[7:14]),
                                          self.rebuild_single_KNN(hyp[14:21]),
                                          self.rebuild_slim(hyp[21:28]),
                                          self.rebuild_puresvd(hyp[28:29]),
                                          None, None,
                                          self.rebuild_p3a(hyp[29:32]),
                                          self.rebuild_rp3beta(hyp[32:36]),
                                          self.rebuild_weights(hyp[36:]))
        self.recommender.fit()
        result = evaluate_algorithm(self.urm_validation,
                                    self.recommender,
                                    at=10)

        return float(result["MAP"] * (-1))
    def evaluate(self, index: int, target_users_profile):
        """
        Method used for the validation and the calculation of the weights
        """
        generated_weights = []

        self.writer.write_report(self.writer, "VALIDATION " + str(index),
                                 report_counter)

        for weight in self.get_test_weights(add_random=False):

            generated_weights.append(weight)
            print("--------------------------------------")

            recommender = WeightedHybrid(self.urm_train,
                                         self.icm,
                                         self.p_icfknn,
                                         self.p_ucfknn,
                                         self.p_cbfknn,
                                         self.p_slimbpr,
                                         self.p_puresvd,
                                         self.p_als,
                                         self.p_cfw,
                                         self.p_p3a,
                                         self.p_rp3b,
                                         self.p_slimen,
                                         weight,
                                         seen_items=target_users_profile)
            recommender.fit()
            result_dict = evaluate_algorithm_crossvalidation(
                self.urm_validation, recommender, self.target_users)
            self.results.append(float(result_dict["MAP"]))

            del recommender

            # self.writer.write_report(self.writer, str(weight), report_counter)
            self.writer.write_report(self.writer, str(result_dict),
                                     report_counter)
            self.writer.write_report(self.writer,
                                     "--------------------------------------",
                                     report_counter)
    def evaluate(self):
        """
        Method used for the validation and the calculation of the weights
        """
        generated_weights = []
        results = []

        for weight in self.get_test_weights(add_random=False):
            generated_weights.append(weight)
            print("--------------------------------------")

            recommender = WeightedHybrid(self.urm_train, self.icm,
                                         self.p_icfknn, self.p_ucfknn,
                                         self.p_cbfknn, self.p_slimbpr,
                                         self.p_puresvd, self.p_als,
                                         self.p_cfw, self.p_p3a, self.p_rp3b,
                                         self.p_slimen, weight)
            recommender.fit()
            result_dict = evaluate_algorithm(self.urm_validation, recommender)
            results.append(float(result_dict["MAP"]))

            self.writer.write_report(self.writer, str(weight), report_counter)
            if self.is_SSLIM:
                self.writer.write_report(self.writer, str(self.sslim_pars),
                                         report_counter)
            self.writer.write_report(self.writer, str(result_dict),
                                     report_counter)

        # Retriving correct weight
        # results.sort()
        weight = generated_weights[int(results.index(max(results)))]

        self.writer.write_report(self.writer,
                                 "--------------------------------------",
                                 report_counter)
        self.writer.write_report(self.writer, "TESTING", report_counter)
        self.writer.write_report(self.writer,
                                 "--------------------------------------",
                                 report_counter)

        recommender = WeightedHybrid(self.urm_post_validation, self.icm,
                                     self.p_icfknn, self.p_ucfknn,
                                     self.p_cbfknn, self.p_slimbpr,
                                     self.p_puresvd, self.p_als, self.p_cfw,
                                     self.p_p3a, self.p_rp3b, self.p_slimen,
                                     weight)
        recommender.fit()
        result_dict = evaluate_algorithm(self.urm_test, recommender)

        self.writer.write_report(self.writer, str(weight), report_counter)
        self.writer.write_report(self.writer, str(result_dict), report_counter)
class Optimizer(object):
    def __init__(self):
        self.HYP = {}
        self.report_counter = 60
        self.writer = Writer()

        # Some parameters
        self.hyperparams = dict()
        self.hyperparams_names = list()
        self.hyperparams_values = list()
        self.hyperparams_single_value = dict()

        # Extractor for matricies
        extractor = Extractor()
        urm = extractor.get_urm_all()
        self.icm = extractor.get_icm_all()

        # Splitting into post-validation & testing in case of parameter tuning
        matrices = loo.split_train_leave_k_out_user_wise(urm, 1, False, True)

        self.urm_post_validation = matrices[0]
        self.urm_test = matrices[1]

        # Splitting the post-validation matrix in train & validation
        # (Problem of merging train and validation again at the end => loo twice)
        matrices_for_validation = loo.split_train_leave_k_out_user_wise(
            self.urm_post_validation, 1, False, True)
        self.urm_train = matrices_for_validation[0]
        self.urm_validation = matrices_for_validation[1]

    def optimeze_weights(self):
        # weights = {'icfknn': 2, 'ucfknn': 0.2, 'cbfknn': 0.5, 'slimbpr': 1, 'puresvd': 1.5, 'als': 1, 'cfw': 3, 'p3a': 2, 'rp3b': 3}
        weights = {}
        weights["icfknn"] = Real(
            low=0, high=5,
            prior='uniform')  # high=100000, prior='log-uniform')
        weights["ucfknn"] = Real(low=0, high=5, prior='uniform')
        weights["cbfknn"] = Real(low=0, high=5, prior='uniform')
        weights["slimbpr"] = Real(low=0, high=5, prior='uniform')
        weights["puresvd"] = Real(low=0, high=5, prior='uniform')
        #weights["als"] = Real(low=0, high=5, prior='uniform')
        weights["p3a"] = Real(low=0, high=5, prior='uniform')
        weights["rp3b"] = Real(low=0, high=5, prior='uniform')

        return weights

    def rebuild_weights(self, array):
        return {
            "icfknn": array[0],
            "ucfknn": array[1],
            "cbfknn": array[2],
            "slimbpr": array[3],
            "puresvd": array[4],
            "p3a": array[5],
            "rp3b": array[6]
        }

    def optimize_single_KNN(self):
        parameters = {
            "topK": Integer(5, 800),
            "shrink": Integer(0, 1000),
            "similarity": Categorical(similarity_type),
            "normalize": Categorical([True, False])
        }

        if parameters["similarity"] == "asymmetric":
            parameters["normalize"] = Categorical([True])

        elif parameters["similarity"] == "tversky":
            parameters["normalize"] = Categorical([True])

        parameters["asymmetric_alpha"] = Real(low=0, high=2, prior='uniform')
        parameters["tversky_alpha"] = Real(low=0, high=2, prior='uniform')
        parameters["tversky_beta"] = Real(low=0, high=2, prior='uniform')

        return parameters

    def rebuild_single_KNN(self, array):
        return {
            "topK": array[0],
            "shrink": array[1],
            "similarity": array[2],
            "normalize": array[3],
            "asymmetric_alpha": array[4],
            "tversky_alpha": array[5],
            "tversky_beta": array[6]
        }

    def optimize_all_KNN(self):
        ICFKNN = self.optimize_single_KNN()
        UCFKNN = self.optimize_single_KNN()
        CBFKNN = self.optimize_single_KNN()

        return (ICFKNN, UCFKNN, CBFKNN)

    def optimize_slim(self):
        return {
            "topK": Integer(5, 1000),
            "epochs": Integer(20, 1500),
            "symmetric": Categorical([True, False]),
            "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
            "lambda_i": Real(low=1e-5, high=1e-2, prior='log-uniform'),
            "lambda_j": Real(low=1e-5, high=1e-2, prior='log-uniform'),
            "learning_rate": Real(low=1e-4, high=1e-1, prior='log-uniform')
        }

    def rebuild_slim(self, array):
        return {
            "topK": array[0],
            "epochs": array[1],
            "symmetric": array[2],
            "sgd_mode": array[3],
            "lambda_i": array[4],
            "lambda_j": array[5],
            "learning_rate": array[6]
        }

    def optimize_puresvd(self):
        return {"num_factors": Integer(5, 1000)}

    def rebuild_puresvd(self, array):
        return {"num_factors": array[0]}

    def optimize_als(self):
        return {
            "alpha_val": Real(low=0, high=2, prior='uniform'),
            "n_factors": Integer(5, 1000),
            "regularization": Real(low=1e-4, high=10, prior='log-uniform'),
            "iterations": Integer(5, 50)
        }

    def rebuild_als(self, array):
        return {
            "alpha_val": array[0],
            "n_factors": array[1],
            "regularization": array[2],
            "iterations": array[3]
        }

    def optimize_p3a(self):
        return {
            "topK": Integer(5, 800),
            "alpha": Real(low=0, high=2, prior='uniform'),
            "normalize_similarity": Categorical([True, False])
        }

    def rebuild_p3a(self, array):
        return {
            "topK": array[0],
            "alpha": array[1],
            "normalize_similarity": array[2]
        }

    def optimize_rp3beta(self):
        return {
            "topK": Integer(5, 800),
            "alpha": Real(low=0, high=2, prior='uniform'),
            "beta": Real(low=0, high=2, prior='uniform'),
            "normalize_similarity": Categorical([True, False])
        }

    def rebuild_rp3beta(self, array):
        return {
            "topK": array[0],
            "alpha": array[1],
            "beta": array[2],
            "normalize_similarity": array[3]
        }

    def evaluate(self, hyp):
        # print("NUMBER OF PARAMETERS ON evaluate():" + str(len(hyp)))

        self.recommender = WeightedHybrid(self.urm_train, self.icm,
                                          self.rebuild_single_KNN(hyp[0:7]),
                                          self.rebuild_single_KNN(hyp[7:14]),
                                          self.rebuild_single_KNN(hyp[14:21]),
                                          self.rebuild_slim(hyp[21:28]),
                                          self.rebuild_puresvd(hyp[28:29]),
                                          None, None,
                                          self.rebuild_p3a(hyp[29:32]),
                                          self.rebuild_rp3beta(hyp[32:36]),
                                          self.rebuild_weights(hyp[36:]))
        self.recommender.fit()
        result = evaluate_algorithm(self.urm_validation,
                                    self.recommender,
                                    at=10)

        return float(result["MAP"] * (-1))

    def post_validation(self, hyp):

        self.recommender = WeightedHybrid(self.urm_post_validation, self.icm,
                                          self.rebuild_single_KNN(hyp[0:7]),
                                          self.rebuild_single_KNN(hyp[7:14]),
                                          self.rebuild_single_KNN(hyp[14:21]),
                                          self.rebuild_slim(hyp[21:28]),
                                          self.rebuild_puresvd(hyp[28:29]),
                                          None, None,
                                          self.rebuild_p3a(hyp[29:32]),
                                          self.rebuild_rp3beta(hyp[32:36]),
                                          self.rebuild_weights(hyp[36:]))
        self.recommender.fit()
        result = evaluate_algorithm(self.urm_test, self.recommender, at=10)
        self.writer.write_report("\n\n" + str(result), self.report_counter)

    def evaluate_single(self, hyp):
        self.recommender = WeightedHybrid(self.urm_train,
                                          self.icm,
                                          p_icfknn=None,
                                          p_ucfknn=None,
                                          p_cbfknn=None,
                                          p_slimbpr=None,
                                          p_puresvd=None,
                                          p_als=self.rebuild_als(hyp[0:]),
                                          p_cfw=None,
                                          p_p3a=None,
                                          p_rp3b=None,
                                          weights={"als": 1})
        self.recommender.fit()
        result = evaluate_algorithm(self.urm_test, self.recommender, at=10)

        return float(result["MAP"] * (-1))

    def run(self):
        self.HYP = {}
        self.HYP["p_icfknn"], self.HYP["p_ucfknn"], self.HYP[
            "p_cbfknn"] = self.optimize_all_KNN()
        self.HYP["p_slimbpr"] = self.optimize_slim()
        self.HYP["p_puresvd"] = self.optimize_puresvd()
        # self.HYP["p_als"] = self.optimize_als()
        self.HYP["p_p3a"] = self.optimize_p3a()
        self.HYP["p_rp3b"] = self.optimize_rp3beta()

        self.HYP["weight"] = self.optimeze_weights()

        self.iterator_to_create_dimension(self.HYP)

        res = gp_minimize(
            self.evaluate,
            self.hyperparams_values,
            n_calls=70,
            n_random_starts=20,
            n_points=10000,
            # noise = 'gaussian',
            noise=1e-5,
            acq_func='gp_hedge',
            acq_optimizer='auto',
            random_state=None,
            verbose=True,
            n_restarts_optimizer=10,
            xi=0.01,
            kappa=1.96,
            x0=None,
            y0=None,
            n_jobs=-1)

        self.writer.write_report(str(res), self.report_counter)
        self.create_parameters(res["x"])
        self.post_validation(res["x"])

    def run_single(self):
        self.HYP["p_als"] = self.optimize_als()

        self.iterator_to_create_dimension(self.HYP)

        res = gp_minimize(
            self.evaluate_single,
            self.hyperparams_values,
            n_calls=70,
            n_random_starts=20,
            n_points=10000,
            # noise = 'gaussian',
            noise=1e-5,
            acq_func='gp_hedge',
            acq_optimizer='auto',
            random_state=None,
            verbose=True,
            n_restarts_optimizer=10,
            xi=0.01,
            kappa=1.96,
            x0=None,
            y0=None,
            n_jobs=-1)

        self.writer.write_report(str(res), self.report_counter)
        self.create_parameters(res["x"])

    def iterator_to_create_dimension(self, to_iterate):
        skopt_types = [Real, Integer, Categorical]
        for name, hyperparam in to_iterate.items():
            if any(
                    isinstance(hyperparam, sko_type)
                    for sko_type in skopt_types):
                self.hyperparams_names.append(name)
                self.hyperparams_values.append(hyperparam)
                self.hyperparams[name] = hyperparam

            elif isinstance(hyperparam, str) or isinstance(
                    hyperparam, int) or isinstance(hyperparam, bool):
                self.hyperparams_single_value[name] = hyperparam
            elif isinstance(hyperparam, dict):
                self.iterator_to_create_dimension(to_iterate[name])
            else:
                raise ValueError("Unexpected parameter type: {} - {}".format(
                    str(name), str(hyperparam)))

    def create_parameters(self, hyp):
        self.report_counter = self.report_counter + 1

        #self.writer.write_report("p_icfknn :" + str(self.rebuild_single_KNN(hyp[0:7]) ), self.report_counter)
        #self.writer.write_report("p_ucfknn :" + str(self.rebuild_single_KNN(hyp[7:14])), self.report_counter)
        #self.writer.write_report("p_cbfknn :" + str(self.rebuild_single_KNN(hyp[14:21])), self.report_counter)
        #self.writer.write_report("p_cbfknn :" + str(self.rebuild_single_KNN(hyp[0:7])), self.report_counter)
        #self.writer.write_report("p_slimbpr :" + str(self.rebuild_slim(hyp[21:28])), self.report_counter)
        #self.writer.write_report("p_puresvd :" + str(self.rebuild_puresvd(hyp[28:29])), self.report_counter)
        #self.writer.write_report("p_p3a :" + str(self.rebuild_p3a(hyp[29:32])), self.report_counter)
        #self.writer.write_report("p_rp3b :" + str(self.rebuild_rp3beta(hyp[32:36])), self.report_counter)
        #self.writer.write_report("weight :" + str(self.rebuild_weights(hyp[36:])), self.report_counter)
        self.writer.write_report("p_als :" + str(self.rebuild_als(hyp[0:])),
                                 self.report_counter)
예제 #10
0
    def evaluate(self):
        weight = {"icfknn": 1}

        recommender = WeightedHybrid(self.urm_train, self.icm, self.p_icfknn,
                                     None, None, None, None, None, None, None,
                                     None, weight)
        recommender.fit()

        # SELECTING BEST 20 RECOMMENDATION
        for n_user in range(0, self.urm_test.shape[0]):
            recommendations = recommender.recommend(n_user, at=self.cutoff)

            self.user_recommendations_user_id.extend([n_user] *
                                                     len(recommendations))
            self.user_recommendations_items.extend(recommendations)

        # CREATING THE DATAFRAME FOR XGBOOST
        self.train_dataframe = pd.DataFrame({
            "user_id":
            self.user_recommendations_user_id,
            "item_id":
            self.user_recommendations_items
        })

        ############################
        ###### ADDING FEATURES #####
        ############################

        # BUILDING POPULARITY ITEMS
        #self.add_top_pop_items()

        # BUILDING USER PROFILE LENGTH
        #self.add_user_profile_length()

        # BUILDING ITEM ASSETS
        self.add_item_asset()

        # BUILDING ITEM PRICE
        self.add_item_price()

        # BUILDING ITEM SUBCLASS
        self.add_item_subclass()

        print(self.train_dataframe.head())
        #return

        params = {
            'max_depth': 3,  # the maximum depth of each tree
            'eta': 0.3,  # step for each iteration
            'silent': 1,  # keep it quiet
            'objective':
            'multi:softprob',  # error evaluation for multiclass training
            #'num_class': 3,  # the number of classes
            'eval_metric': 'merror'
        }  # evaluation metric

        num_round = 20  # the number of training iterations (number of trees)

        msk = np.random.rand(len(self.train_dataframe)) < 0.8
        dtrain = self.train_dataframe[msk]
        dtest = self.train_dataframe[~msk]

        dtrain = xgb.DMatrix(dtrain, missing=-999.0)
        dtest = xgb.DMatrix(dtest, missing=-999.0)

        evallist = [(dtest, 'eval'), (dtrain, 'train')]

        model = xgb.train(params, dtrain, num_round, evallist)

        print(model.predict())
        #print("user array:" + str(len()))
        print(" prediction array:" + str(len(model.predict())))