def write_submission(self, users): """ This method is used to write the submission, selecting only chosen algorithms :return: """ recommender = WeightedHybrid(self.urm_train, self.icm, self.p_icfknn, self.p_ucfknn, self.p_cbfknn, self.p_slimbpr, self.p_puresvd, self.p_als, self.p_cfw, self.p_p3a, self.p_rp3b, self.p_slimen, WeightConstants.NO_WEIGHTS[0], seen_items=self.urm_train) recommender.fit() from tqdm import tqdm for user_id in tqdm(users): recs = recommender.recommend(user_id, at=20) self.writer.write(self.writer, user_id, recs, sub_counter=submission_counter) print("Submission file written")
def write_submission(self, users): """ This method is used to write the submission, selecting only chosen algorithms :return: """ self.writer.write_header(self.writer, sub_counter=submission_counter) from SLIM.SLIM_BPR_Cython import SLIM_BPR_Cython slim_bpr = SLIM_BPR_Cython(self.icm) slim_bpr.fit(**WeightConstants.SLIM_BPR) self.icm = slim_bpr.recs.copy().tocsr() recommender = WeightedHybrid(self.urm_train, self.icm, self.p_icfknn, self.p_ucfknn, self.p_cbfknn, self.p_slimbpr, self.p_puresvd, self.p_als, self.p_cfw, self.p_p3a, self.p_rp3b, self.p_slimen, WeightConstants.SUBM_WEIGHTS) recommender.fit() from tqdm import tqdm for user_id in tqdm(users): recs = recommender.recommend(user_id, at=10) self.writer.write(self.writer, user_id, recs, sub_counter=submission_counter) print("Submission file written")
def post_validation(self, hyp): self.recommender = WeightedHybrid(self.urm_post_validation, self.icm, self.rebuild_single_KNN(hyp[0:7]), self.rebuild_single_KNN(hyp[7:14]), self.rebuild_single_KNN(hyp[14:21]), self.rebuild_slim(hyp[21:28]), self.rebuild_puresvd(hyp[28:29]), None, None, self.rebuild_p3a(hyp[29:32]), self.rebuild_rp3beta(hyp[32:36]), self.rebuild_weights(hyp[36:])) self.recommender.fit() result = evaluate_algorithm(self.urm_test, self.recommender, at=10) self.writer.write_report("\n\n" + str(result), self.report_counter)
def evaluate_single(self, hyp): self.recommender = WeightedHybrid(self.urm_train, self.icm, p_icfknn=None, p_ucfknn=None, p_cbfknn=None, p_slimbpr=None, p_puresvd=None, p_als=self.rebuild_als(hyp[0:]), p_cfw=None, p_p3a=None, p_rp3b=None, weights={"als": 1}) self.recommender.fit() result = evaluate_algorithm(self.urm_test, self.recommender, at=10) return float(result["MAP"] * (-1))
def evaluate(self, index: int, target_users_profile): """ This method capture the predictions of the CrossValidation running the Hybrid :param index: number of iteration (from 1 to 4) depending on the current sub-URM :param target_users_profile: profile of the users wanted to predict :return: """ weight = { 'icfknn': 1, 'ucfknn': 1, 'cbfknn': 1, 'slimbpr': 1, 'puresvd': 1, 'als': 1, 'cfw': 1, 'p3a': 1, 'rp3b': 1, 'slimen': 1 } recommender = WeightedHybrid(self.urm_train, self.icm, self.p_icfknn, self.p_ucfknn, self.p_cbfknn, self.p_slimbpr, self.p_puresvd, self.p_als, self.p_cfw, self.p_p3a, self.p_rp3b, self.p_slimen, weight, seen_items=target_users_profile) recommender.fit() user_ids = [] item_ids = [] # SELECTING THE BEST RECOMMENDATIONS for n_user in self.target_users: recommendations = recommender.recommend(n_user, at=self.cutoff) user_ids.extend([n_user] * len(recommendations)) item_ids.extend(recommendations) return user_ids, item_ids
def evaluate(self, hyp): # print("NUMBER OF PARAMETERS ON evaluate():" + str(len(hyp))) self.recommender = WeightedHybrid(self.urm_train, self.icm, self.rebuild_single_KNN(hyp[0:7]), self.rebuild_single_KNN(hyp[7:14]), self.rebuild_single_KNN(hyp[14:21]), self.rebuild_slim(hyp[21:28]), self.rebuild_puresvd(hyp[28:29]), None, None, self.rebuild_p3a(hyp[29:32]), self.rebuild_rp3beta(hyp[32:36]), self.rebuild_weights(hyp[36:])) self.recommender.fit() result = evaluate_algorithm(self.urm_validation, self.recommender, at=10) return float(result["MAP"] * (-1))
def evaluate(self, index: int, target_users_profile): """ Method used for the validation and the calculation of the weights """ generated_weights = [] self.writer.write_report(self.writer, "VALIDATION " + str(index), report_counter) for weight in self.get_test_weights(add_random=False): generated_weights.append(weight) print("--------------------------------------") recommender = WeightedHybrid(self.urm_train, self.icm, self.p_icfknn, self.p_ucfknn, self.p_cbfknn, self.p_slimbpr, self.p_puresvd, self.p_als, self.p_cfw, self.p_p3a, self.p_rp3b, self.p_slimen, weight, seen_items=target_users_profile) recommender.fit() result_dict = evaluate_algorithm_crossvalidation( self.urm_validation, recommender, self.target_users) self.results.append(float(result_dict["MAP"])) del recommender # self.writer.write_report(self.writer, str(weight), report_counter) self.writer.write_report(self.writer, str(result_dict), report_counter) self.writer.write_report(self.writer, "--------------------------------------", report_counter)
def evaluate(self): """ Method used for the validation and the calculation of the weights """ generated_weights = [] results = [] for weight in self.get_test_weights(add_random=False): generated_weights.append(weight) print("--------------------------------------") recommender = WeightedHybrid(self.urm_train, self.icm, self.p_icfknn, self.p_ucfknn, self.p_cbfknn, self.p_slimbpr, self.p_puresvd, self.p_als, self.p_cfw, self.p_p3a, self.p_rp3b, self.p_slimen, weight) recommender.fit() result_dict = evaluate_algorithm(self.urm_validation, recommender) results.append(float(result_dict["MAP"])) self.writer.write_report(self.writer, str(weight), report_counter) if self.is_SSLIM: self.writer.write_report(self.writer, str(self.sslim_pars), report_counter) self.writer.write_report(self.writer, str(result_dict), report_counter) # Retriving correct weight # results.sort() weight = generated_weights[int(results.index(max(results)))] self.writer.write_report(self.writer, "--------------------------------------", report_counter) self.writer.write_report(self.writer, "TESTING", report_counter) self.writer.write_report(self.writer, "--------------------------------------", report_counter) recommender = WeightedHybrid(self.urm_post_validation, self.icm, self.p_icfknn, self.p_ucfknn, self.p_cbfknn, self.p_slimbpr, self.p_puresvd, self.p_als, self.p_cfw, self.p_p3a, self.p_rp3b, self.p_slimen, weight) recommender.fit() result_dict = evaluate_algorithm(self.urm_test, recommender) self.writer.write_report(self.writer, str(weight), report_counter) self.writer.write_report(self.writer, str(result_dict), report_counter)
class Optimizer(object): def __init__(self): self.HYP = {} self.report_counter = 60 self.writer = Writer() # Some parameters self.hyperparams = dict() self.hyperparams_names = list() self.hyperparams_values = list() self.hyperparams_single_value = dict() # Extractor for matricies extractor = Extractor() urm = extractor.get_urm_all() self.icm = extractor.get_icm_all() # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise(urm, 1, False, True) self.urm_post_validation = matrices[0] self.urm_test = matrices[1] # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) matrices_for_validation = loo.split_train_leave_k_out_user_wise( self.urm_post_validation, 1, False, True) self.urm_train = matrices_for_validation[0] self.urm_validation = matrices_for_validation[1] def optimeze_weights(self): # weights = {'icfknn': 2, 'ucfknn': 0.2, 'cbfknn': 0.5, 'slimbpr': 1, 'puresvd': 1.5, 'als': 1, 'cfw': 3, 'p3a': 2, 'rp3b': 3} weights = {} weights["icfknn"] = Real( low=0, high=5, prior='uniform') # high=100000, prior='log-uniform') weights["ucfknn"] = Real(low=0, high=5, prior='uniform') weights["cbfknn"] = Real(low=0, high=5, prior='uniform') weights["slimbpr"] = Real(low=0, high=5, prior='uniform') weights["puresvd"] = Real(low=0, high=5, prior='uniform') #weights["als"] = Real(low=0, high=5, prior='uniform') weights["p3a"] = Real(low=0, high=5, prior='uniform') weights["rp3b"] = Real(low=0, high=5, prior='uniform') return weights def rebuild_weights(self, array): return { "icfknn": array[0], "ucfknn": array[1], "cbfknn": array[2], "slimbpr": array[3], "puresvd": array[4], "p3a": array[5], "rp3b": array[6] } def optimize_single_KNN(self): parameters = { "topK": Integer(5, 800), "shrink": Integer(0, 1000), "similarity": Categorical(similarity_type), "normalize": Categorical([True, False]) } if parameters["similarity"] == "asymmetric": parameters["normalize"] = Categorical([True]) elif parameters["similarity"] == "tversky": parameters["normalize"] = Categorical([True]) parameters["asymmetric_alpha"] = Real(low=0, high=2, prior='uniform') parameters["tversky_alpha"] = Real(low=0, high=2, prior='uniform') parameters["tversky_beta"] = Real(low=0, high=2, prior='uniform') return parameters def rebuild_single_KNN(self, array): return { "topK": array[0], "shrink": array[1], "similarity": array[2], "normalize": array[3], "asymmetric_alpha": array[4], "tversky_alpha": array[5], "tversky_beta": array[6] } def optimize_all_KNN(self): ICFKNN = self.optimize_single_KNN() UCFKNN = self.optimize_single_KNN() CBFKNN = self.optimize_single_KNN() return (ICFKNN, UCFKNN, CBFKNN) def optimize_slim(self): return { "topK": Integer(5, 1000), "epochs": Integer(20, 1500), "symmetric": Categorical([True, False]), "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "lambda_i": Real(low=1e-5, high=1e-2, prior='log-uniform'), "lambda_j": Real(low=1e-5, high=1e-2, prior='log-uniform'), "learning_rate": Real(low=1e-4, high=1e-1, prior='log-uniform') } def rebuild_slim(self, array): return { "topK": array[0], "epochs": array[1], "symmetric": array[2], "sgd_mode": array[3], "lambda_i": array[4], "lambda_j": array[5], "learning_rate": array[6] } def optimize_puresvd(self): return {"num_factors": Integer(5, 1000)} def rebuild_puresvd(self, array): return {"num_factors": array[0]} def optimize_als(self): return { "alpha_val": Real(low=0, high=2, prior='uniform'), "n_factors": Integer(5, 1000), "regularization": Real(low=1e-4, high=10, prior='log-uniform'), "iterations": Integer(5, 50) } def rebuild_als(self, array): return { "alpha_val": array[0], "n_factors": array[1], "regularization": array[2], "iterations": array[3] } def optimize_p3a(self): return { "topK": Integer(5, 800), "alpha": Real(low=0, high=2, prior='uniform'), "normalize_similarity": Categorical([True, False]) } def rebuild_p3a(self, array): return { "topK": array[0], "alpha": array[1], "normalize_similarity": array[2] } def optimize_rp3beta(self): return { "topK": Integer(5, 800), "alpha": Real(low=0, high=2, prior='uniform'), "beta": Real(low=0, high=2, prior='uniform'), "normalize_similarity": Categorical([True, False]) } def rebuild_rp3beta(self, array): return { "topK": array[0], "alpha": array[1], "beta": array[2], "normalize_similarity": array[3] } def evaluate(self, hyp): # print("NUMBER OF PARAMETERS ON evaluate():" + str(len(hyp))) self.recommender = WeightedHybrid(self.urm_train, self.icm, self.rebuild_single_KNN(hyp[0:7]), self.rebuild_single_KNN(hyp[7:14]), self.rebuild_single_KNN(hyp[14:21]), self.rebuild_slim(hyp[21:28]), self.rebuild_puresvd(hyp[28:29]), None, None, self.rebuild_p3a(hyp[29:32]), self.rebuild_rp3beta(hyp[32:36]), self.rebuild_weights(hyp[36:])) self.recommender.fit() result = evaluate_algorithm(self.urm_validation, self.recommender, at=10) return float(result["MAP"] * (-1)) def post_validation(self, hyp): self.recommender = WeightedHybrid(self.urm_post_validation, self.icm, self.rebuild_single_KNN(hyp[0:7]), self.rebuild_single_KNN(hyp[7:14]), self.rebuild_single_KNN(hyp[14:21]), self.rebuild_slim(hyp[21:28]), self.rebuild_puresvd(hyp[28:29]), None, None, self.rebuild_p3a(hyp[29:32]), self.rebuild_rp3beta(hyp[32:36]), self.rebuild_weights(hyp[36:])) self.recommender.fit() result = evaluate_algorithm(self.urm_test, self.recommender, at=10) self.writer.write_report("\n\n" + str(result), self.report_counter) def evaluate_single(self, hyp): self.recommender = WeightedHybrid(self.urm_train, self.icm, p_icfknn=None, p_ucfknn=None, p_cbfknn=None, p_slimbpr=None, p_puresvd=None, p_als=self.rebuild_als(hyp[0:]), p_cfw=None, p_p3a=None, p_rp3b=None, weights={"als": 1}) self.recommender.fit() result = evaluate_algorithm(self.urm_test, self.recommender, at=10) return float(result["MAP"] * (-1)) def run(self): self.HYP = {} self.HYP["p_icfknn"], self.HYP["p_ucfknn"], self.HYP[ "p_cbfknn"] = self.optimize_all_KNN() self.HYP["p_slimbpr"] = self.optimize_slim() self.HYP["p_puresvd"] = self.optimize_puresvd() # self.HYP["p_als"] = self.optimize_als() self.HYP["p_p3a"] = self.optimize_p3a() self.HYP["p_rp3b"] = self.optimize_rp3beta() self.HYP["weight"] = self.optimeze_weights() self.iterator_to_create_dimension(self.HYP) res = gp_minimize( self.evaluate, self.hyperparams_values, n_calls=70, n_random_starts=20, n_points=10000, # noise = 'gaussian', noise=1e-5, acq_func='gp_hedge', acq_optimizer='auto', random_state=None, verbose=True, n_restarts_optimizer=10, xi=0.01, kappa=1.96, x0=None, y0=None, n_jobs=-1) self.writer.write_report(str(res), self.report_counter) self.create_parameters(res["x"]) self.post_validation(res["x"]) def run_single(self): self.HYP["p_als"] = self.optimize_als() self.iterator_to_create_dimension(self.HYP) res = gp_minimize( self.evaluate_single, self.hyperparams_values, n_calls=70, n_random_starts=20, n_points=10000, # noise = 'gaussian', noise=1e-5, acq_func='gp_hedge', acq_optimizer='auto', random_state=None, verbose=True, n_restarts_optimizer=10, xi=0.01, kappa=1.96, x0=None, y0=None, n_jobs=-1) self.writer.write_report(str(res), self.report_counter) self.create_parameters(res["x"]) def iterator_to_create_dimension(self, to_iterate): skopt_types = [Real, Integer, Categorical] for name, hyperparam in to_iterate.items(): if any( isinstance(hyperparam, sko_type) for sko_type in skopt_types): self.hyperparams_names.append(name) self.hyperparams_values.append(hyperparam) self.hyperparams[name] = hyperparam elif isinstance(hyperparam, str) or isinstance( hyperparam, int) or isinstance(hyperparam, bool): self.hyperparams_single_value[name] = hyperparam elif isinstance(hyperparam, dict): self.iterator_to_create_dimension(to_iterate[name]) else: raise ValueError("Unexpected parameter type: {} - {}".format( str(name), str(hyperparam))) def create_parameters(self, hyp): self.report_counter = self.report_counter + 1 #self.writer.write_report("p_icfknn :" + str(self.rebuild_single_KNN(hyp[0:7]) ), self.report_counter) #self.writer.write_report("p_ucfknn :" + str(self.rebuild_single_KNN(hyp[7:14])), self.report_counter) #self.writer.write_report("p_cbfknn :" + str(self.rebuild_single_KNN(hyp[14:21])), self.report_counter) #self.writer.write_report("p_cbfknn :" + str(self.rebuild_single_KNN(hyp[0:7])), self.report_counter) #self.writer.write_report("p_slimbpr :" + str(self.rebuild_slim(hyp[21:28])), self.report_counter) #self.writer.write_report("p_puresvd :" + str(self.rebuild_puresvd(hyp[28:29])), self.report_counter) #self.writer.write_report("p_p3a :" + str(self.rebuild_p3a(hyp[29:32])), self.report_counter) #self.writer.write_report("p_rp3b :" + str(self.rebuild_rp3beta(hyp[32:36])), self.report_counter) #self.writer.write_report("weight :" + str(self.rebuild_weights(hyp[36:])), self.report_counter) self.writer.write_report("p_als :" + str(self.rebuild_als(hyp[0:])), self.report_counter)
def evaluate(self): weight = {"icfknn": 1} recommender = WeightedHybrid(self.urm_train, self.icm, self.p_icfknn, None, None, None, None, None, None, None, None, weight) recommender.fit() # SELECTING BEST 20 RECOMMENDATION for n_user in range(0, self.urm_test.shape[0]): recommendations = recommender.recommend(n_user, at=self.cutoff) self.user_recommendations_user_id.extend([n_user] * len(recommendations)) self.user_recommendations_items.extend(recommendations) # CREATING THE DATAFRAME FOR XGBOOST self.train_dataframe = pd.DataFrame({ "user_id": self.user_recommendations_user_id, "item_id": self.user_recommendations_items }) ############################ ###### ADDING FEATURES ##### ############################ # BUILDING POPULARITY ITEMS #self.add_top_pop_items() # BUILDING USER PROFILE LENGTH #self.add_user_profile_length() # BUILDING ITEM ASSETS self.add_item_asset() # BUILDING ITEM PRICE self.add_item_price() # BUILDING ITEM SUBCLASS self.add_item_subclass() print(self.train_dataframe.head()) #return params = { 'max_depth': 3, # the maximum depth of each tree 'eta': 0.3, # step for each iteration 'silent': 1, # keep it quiet 'objective': 'multi:softprob', # error evaluation for multiclass training #'num_class': 3, # the number of classes 'eval_metric': 'merror' } # evaluation metric num_round = 20 # the number of training iterations (number of trees) msk = np.random.rand(len(self.train_dataframe)) < 0.8 dtrain = self.train_dataframe[msk] dtest = self.train_dataframe[~msk] dtrain = xgb.DMatrix(dtrain, missing=-999.0) dtest = xgb.DMatrix(dtest, missing=-999.0) evallist = [(dtest, 'eval'), (dtrain, 'train')] model = xgb.train(params, dtrain, num_round, evallist) print(model.predict()) #print("user array:" + str(len())) print(" prediction array:" + str(len(model.predict())))