def __init__(self): self.HYP = {} self.report_counter = 60 self.writer = Writer() # Some parameters self.hyperparams = dict() self.hyperparams_names = list() self.hyperparams_values = list() self.hyperparams_single_value = dict() # Extractor for matricies extractor = Extractor() urm = extractor.get_urm_all() self.icm = extractor.get_icm_all() # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise(urm, 1, False, True) self.urm_post_validation = matrices[0] self.urm_test = matrices[1] # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) matrices_for_validation = loo.split_train_leave_k_out_user_wise( self.urm_post_validation, 1, False, True) self.urm_train = matrices_for_validation[0] self.urm_validation = matrices_for_validation[1]
def create_validation_test_files(self, write_userf, write_itemf): import Utils.Split.split_train_validation_leave_k_out as loo urm = self.urm # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise(urm, 1, False, True) urm_post_validation = matrices[0] self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/post_validation.txt" self.urm = urm_post_validation self.create_general_file(write_userf, write_itemf) urm_test = matrices[1] self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/test.txt" self.urm = urm_test self.create_general_file(write_userf, write_itemf) # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) matrices_for_validation = loo.split_train_leave_k_out_user_wise( urm_post_validation, 1, False, True) urm_train = matrices_for_validation[0] self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/train.txt" self.urm = urm_train self.create_general_file(write_userf, write_itemf) urm_validation = matrices_for_validation[1] self.my_path = self.extractor.DATA_FILE_PATH + "xL_data/validation.txt" self.urm = urm_validation self.create_general_file(write_userf, write_itemf)
def run(self, is_test, is_SSLIM): """ From here we start each algorithm. :param is_test: specifies if we want to write a report or a submission """ self.is_test = is_test self.is_SSLIM = is_SSLIM if self.is_test: extractor = Extractor() urm = extractor.get_urm_all() self.icm = extractor.get_icm_all() # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise( urm, 1, False, True) self.urm_post_validation = matrices[0] self.urm_test = matrices[1] # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) matrices_for_validation = loo.split_train_leave_k_out_user_wise( self.urm_post_validation, 1, False, True) self.urm_train = matrices_for_validation[0] self.urm_validation = matrices_for_validation[1] self.urm_train = extractor.preprocess_csr_matrix(self.urm_train) self.write_report() if self.is_SSLIM: # for topK in [50, 100, 200]: # for epochs in [10, 20, 50, 100, 200, 300]: self.sslim_pars = WeightConstants.SLIM_BPR_ICM slim_bpr = SLIM_BPR_Cython(self.icm.copy()) slim_bpr.fit(**self.sslim_pars) self.icm = slim_bpr.recs.copy().tocsr() self.evaluate() else: self.evaluate() else: extractor = Extractor() users = extractor.get_target_users_of_recs() self.urm_train = extractor.get_urm_all() self.icm = extractor.get_icm_all() self.write_submission(users)
def run(self, is_test): self.is_test = is_test if self.is_test: extractor = Extractor() builder = Builder() urm = extractor.get_urm_all() self.icm = extractor.get_icm_all() # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise( urm, 1, False, True) self.urm_post_validation = matrices[0] self.urm_test = matrices[1] # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) matrices_for_validation = loo.split_train_leave_k_out_user_wise( self.urm_post_validation, 1, False, True) self.urm_train = matrices_for_validation[0] self.urm_validation = matrices_for_validation[1] # Building the urm_per_feature lists if self.users_per_region: self.urm_per_region_list = builder.build_per_region_urm_train( self.urm_train) if self.users_per_age: self.urm_per_age_list = builder.build_per_age_urm_train( self.urm_train) self.write_report() self.evaluate() else: extractor = Extractor() builder = Builder() users = extractor.get_target_users_of_recs() self.urm_train = extractor.get_urm_all() self.icm = extractor.get_icm_all() # Building the urm_per_feature lists if self.users_per_region: self.urm_per_region_list = builder.build_per_region_urm_train( self.urm_train) if self.users_per_age: self.urm_per_age_list = builder.build_per_age_urm_train( self.urm_train) self.write_submission(users)
def run(self, is_test): """ From here we start each algorithm. :param is_test: specifies if we want to write a report or a submission """ if is_test: # CREATION OF THE VALIDATIONS FOR EACH PART OF THE TRAIN vals = [] urms = [] target_profiles = [] for i in range(1, 5): urm_to_predict = self.extractor.get_single_urm(i) matrices = loo.split_train_leave_k_out_user_wise( urm_to_predict, 1, False, True) target_users_profile = matrices[0] target_profiles.append(target_users_profile) val = matrices[1] vals.append(val) urm = self.extractor.get_others_urm_vstack(i) urms.append(urm) if self.icfknn: self.p_icfknn = ParametersTuning.ICFKNN_BEST if self.cbfknn: self.p_cbfknn = ParametersTuning.CBFKNN_BEST if self.rp3b: self.p_rp3b = ParametersTuning.RP3B_BEST if self.slim_en: self.p_slimen = ParametersTuning.SLIM_ELASTIC_NET_BEST # URM splitted in 4 smaller URMs for cross-validation for i in range(0, 4): self.urm_validation = vals[i].copy() self.urm_train = urms[i].copy() self.target_users = self.extractor.get_target_users_of_specific_part( i + 1) # GETTING THE RECOMMENDATIONS FOR THE TRAIN DATAFRAME user_ids, item_ids = self.evaluate(i + 1, target_profiles[i]) self.df_user_id_col.extend(user_ids) self.df_item_id_col.extend(item_ids) # print(self.df_user_id_col[0:100]) # print(self.df_item_id_col[0:100]) # print(len(self.df_user_id_col)) # print(len(self.df_item_id_col)) self.score_ranking()
def run(self, is_test): """ From here we start each algorithm. :param is_test: specifies if we want to write a report or a submission """ self.is_test = is_test if self.is_test: extractor = Extractor urm = extractor.get_urm_all(extractor) self.icm = extractor.get_icm_all(extractor) # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise( urm, 1, False, True) self.urm_post_validation = matrices[0] self.urm_test = matrices[1] # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) matrices_for_validation = loo.split_train_leave_k_out_user_wise( self.urm_post_validation, 1, False, True) self.urm_train = matrices_for_validation[0] self.urm_validation = matrices_for_validation[1] self.write_report() self.evaluate() else: extractor = Extractor users = extractor.get_target_users_of_recs(extractor) self.urm_train = extractor.get_urm_all(extractor) self.icm = extractor.get_icm_all(extractor) self.write_submission(users)
def run(self, is_test): """ From here we start each algorithm. :param is_test: specifies if we want to write a report or a submission """ self.is_test = is_test self.icm = self.extractor.get_icm_all() if self.is_test: # CREATION OF THE VALIDATIONS FOR EACH PART OF THE TRAIN vals = [] urms = [] target_profiles = [] for i in range(1, 5): urm_to_predict = self.extractor.get_single_urm(i) matrices = loo.split_train_leave_k_out_user_wise( urm_to_predict, 1, False, True) target_users_profile = matrices[0] target_profiles.append(target_users_profile) val = matrices[1] vals.append(val) urm = self.extractor.get_others_urm_vstack(i) urms.append(urm) if self.icfknn: self.p_icfknn = ParametersTuning.ICFKNN_BEST if self.cbfknn: self.p_cbfknn = ParametersTuning.CBFKNN_BEST if self.rp3b: self.p_rp3b = ParametersTuning.RP3B_BEST if self.slim_en: self.p_slimen = ParametersTuning.SLIM_ELASTIC_NET_BEST # TUNING WITH THE DIFFERENT PARAMS for params in ParametersTuning.UCFKNN: if self.ucfknn: self.p_ucfknn = params self.write_report() # URM splitted in 4 smaller URMs for cross-validation for i in range(0, 4): self.urm_validation = vals[i].copy() self.urm_train = urms[i].copy() self.target_users = self.extractor.get_target_users_of_specific_part( i + 1) self.evaluate(i + 1, target_profiles[i]) self.output_average_MAP() self.output_best_params() else: self.p_cbfknn = ParametersTuning.CBFKNN_BEST self.p_icfknn = ParametersTuning.ICFKNN_BEST self.p_slimen = ParametersTuning.SLIM_ELASTIC_NET_BEST self.p_rp3b = ParametersTuning.RP3B_BEST users = self.extractor.get_target_users_of_recs() self.urm_train = self.extractor.get_urm_all() self.writer.write_header(self.writer, sub_counter=submission_counter) self.write_submission(users)