def main(): # Data loading root_data_path = os.path.join(get_project_root_path(), "data/") data_reader = RecSys2019Reader(root_data_path) data_reader = New_DataSplitter_leave_k_out( data_reader, k_out_value=K_OUT, use_validation_set=False, allow_cold_users=ALLOW_COLD_USERS, force_new_split=True, seed=get_split_seed()) data_reader.load_data() URM_train, URM_test = data_reader.get_holdout_split() ICM_all, _ = get_ICM_train_new(data_reader) UCM_all, _ = get_UCM_train_new(data_reader) # Ignoring users ignore_users = get_ignore_users( URM_train, data_reader.get_original_user_id_to_index_mapper(), lower_threshold=LOWER_THRESHOLD, upper_threshold=UPPER_THRESHOLD, ignore_non_target_users=IGNORE_NON_TARGET_USERS) evaluator = EvaluatorHoldout(URM_test, cutoff_list=[CUTOFF], ignore_users=ignore_users) # Model evaluation model = get_model(URM_train, ICM_all, UCM_all) print(evaluator.evaluateRecommender(model))
def main(): args = get_arguments() # Data loading root_data_path = args.reader_path data_reader = RecSys2019Reader(root_data_path) data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=K_OUT, allow_cold_users=ALLOW_COLD_USERS, use_validation_set=False, force_new_split=True, seed=args.seed) data_reader.load_data() URM_train, URM_test = data_reader.get_holdout_split() # Remove interactions to users that has len == 1 to URM_train len_1_users_mask = np.ediff1d(URM_train.tocsr().indptr) == 1 len_1_users = np.arange(URM_train.shape[0])[len_1_users_mask] URM_train = URM_train.tolil() URM_train[len_1_users, :] = 0 URM_train = URM_train.tocsr() # Remove interactions to users that has len == 1 to URM_test len_1_users_mask = np.ediff1d(URM_test.tocsr().indptr) == 1 len_1_users = np.arange(URM_test.shape[0])[len_1_users_mask] URM_test = URM_test.tolil() URM_test[len_1_users, :] = 0 URM_test = URM_test.tocsr() UCM_all = get_UCM_train_cold(data_reader) ignore_users = get_ignore_users(URM_train, data_reader.get_original_user_id_to_index_mapper(), lower_threshold=args.lower_threshold, upper_threshold=args.upper_threshold, ignore_non_target_users=args.exclude_non_target) ignore_users = np.concatenate([ignore_users, len_1_users]) # Setting evaluator cutoff_list = [10] evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=ignore_users) # HP tuning print("Start tuning...") version_path = "../../report/hp_tuning/{}/".format(args.recommender_name) now = datetime.now().strftime('%b%d_%H-%M-%S') now = now + "_k_out_value_{}/".format(K_OUT) version_path = version_path + "/" + now run_parameter_search_user_demographic(URM_train=URM_train, UCM_object=UCM_all, UCM_name="UCM_all", recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name], evaluator_validation=evaluator, metric_to_optimize="MAP", output_folder_path=version_path, parallelizeKNN=True, n_cases=int(args.n_cases), n_random_starts=int(args.n_random_starts)) print("...tuning ended")
def main(): args = get_arguments() # Data loading data_reader = read_split_load_data(3, args.allow_cold_users, args.seed) URM_train, URM_test = data_reader.get_holdout_split() ICM_categorical = data_reader.get_ICM_from_name("ICM_sub_class") ICM_numerical, _ = get_ICM_numerical(data_reader.dataReader_object) ICM_all, _ = get_ICM_train_new(data_reader) similarity_type_list = None if args.recommender_name == "item_cbf_numerical": ICM = ICM_numerical ICM_name = "ICM_numerical" elif args.recommender_name == "item_cbf_categorical": ICM = ICM_categorical ICM_name = "ICM_categorical" else: ICM = ICM_all ICM_name = "ICM_all" # Setting evaluator ignore_users = get_ignore_users( URM_train, data_reader.get_original_user_id_to_index_mapper(), lower_threshold=args.lower_threshold, upper_threshold=args.upper_threshold, ignore_non_target_users=args.exclude_non_target) evaluator = EvaluatorHoldout(URM_test, cutoff_list=[10], ignore_users=ignore_users) # HP tuning print("Start tuning...") version_path = "../../report/hp_tuning/{}/".format(args.recommender_name) now = datetime.now().strftime('%b%d_%H-%M-%S') now = now + "_k_out_value_3/" version_path = version_path + "/" + now run_parameter_search_item_content( URM_train=URM_train, ICM_object=ICM, ICM_name=ICM_name, recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name], evaluator_validation=evaluator, metric_to_optimize="MAP", output_folder_path=version_path, similarity_type_list=similarity_type_list, parallelizeKNN=True, n_cases=args.n_cases, n_random_starts=args.n_random_starts) print("...tuning ended")
UCM_all = get_UCM_train(data_reader) model = HybridWeightedAverageRecommender(URM_train, normalize=NORMALIZE) all_models = _get_all_models(URM_train=URM_train, UCM_all=UCM_all, ICM_all=ICM_all) for model_name, model_object in all_models.items(): model.add_fitted_model(model_name, model_object) print("The models added in the hybrid are: {}".format( list(all_models.keys()))) # Setting evaluator ignore_users = get_ignore_users( URM_train, data_reader.get_original_user_id_to_index_mapper(), lower_threshold=LOWER_THRESHOLD, upper_threshold=UPPER_THRESHOLD, ignore_non_target_users=IGNORE_NON_TARGET_USERS) evaluator = EvaluatorHoldout(URM_test, cutoff_list=[CUTOFF], ignore_users=ignore_users) version_path = "../../report/hp_tuning/hybrid_weighted_avg" now = datetime.now().strftime('%b%d_%H-%M-%S') now = now + "_k_out_value_{}/".format(K_OUT) version_path = version_path + "/" + now run_parameter_search_hybrid(model, metric_to_optimize="MAP", evaluator_validation=evaluator,
train_df = preprocess_dataframe_after_reading(train_df) y_train = train_df['label'].values + 1 train_df = train_df.drop(columns=["label"], inplace=False) valid_df = preprocess_dataframe_after_reading(valid_df) valid_df = valid_df.drop(columns=[], inplace=False) _, non_zero_count, total = get_label_array(data_frame=train_df, URM_train=URM_train) y_valid, _, _ = get_label_array(data_frame=valid_df, URM_train=URM_test) # Setting evaluator mapper = data_reader.get_original_user_id_to_index_mapper() ignore_users = get_ignore_users(URM_train, mapper, lower_threshold=20, upper_threshold=2**16 - 1, ignore_non_target_users=True) evaluator = EvaluatorHoldout(URM_test, cutoff_list=[10], ignore_users=ignore_users) total_users = np.arange(URM_train.shape[0]) mask = np.in1d(total_users, ignore_users, invert=True) users_to_validate = total_users[mask] # HP tuning print("Start tuning...") version_path = "../../report/hp_tuning/light_gbm/" now = datetime.now().strftime('%b%d_%H-%M-%S') now = now + "_k_out_value_3_eval/" version_path = version_path + now
def main(): set_env_variables() args = get_arguments() seeds = get_seed_lists(args.n_folds, get_split_seed()) # --------- DATA LOADING SECTION --------- # URM_train_list = [] ICM_train_list = [] UCM_train_list = [] evaluator_list = [] for fold_idx in range(args.n_folds): # Read and split data data_reader = read_split_load_data(K_OUT, args.allow_cold_users, seeds[fold_idx]) URM_train, URM_test = data_reader.get_holdout_split() ICM_train, item_feature2range = get_ICM_train_new(data_reader) UCM_train, user_feature2range = get_UCM_train_new(data_reader) # Ignore users and setting evaluator ignore_users = get_ignore_users(URM_train, data_reader.get_original_user_id_to_index_mapper(), args.lower_threshold, args.upper_threshold, ignore_non_target_users=args.exclude_non_target) # Ignore users by age # UCM_age = data_reader.get_UCM_from_name("UCM_age") # age_feature_to_id_mapper = data_reader.dataReader_object.get_UCM_feature_to_index_mapper_from_name("UCM_age") # age_demographic = get_user_demographic(UCM_age, age_feature_to_id_mapper, binned=True) # ignore_users = np.unique(np.concatenate((ignore_users, get_ignore_users_age(age_demographic, AGE_TO_KEEP)))) URM_train_list.append(URM_train) ICM_train_list.append(ICM_train) UCM_train_list.append(UCM_train) evaluator = EvaluatorHoldout(URM_test, cutoff_list=[CUTOFF], ignore_users=np.unique(ignore_users)) evaluator_list.append(evaluator) # --------- HYPER PARAMETERS TUNING SECTION --------- # print("Start tuning...") hp_tuning_path = "../../../report/hp_tuning/" + args.recommender_name + "/" date_string = datetime.now().strftime('%b%d_%H-%M-%S_k1_lt_{}/'.format(args.lower_threshold)) output_folder_path = hp_tuning_path + date_string if args.recommender_name in COLLABORATIVE_RECOMMENDER_CLASS_DICT.keys(): run_cv_parameter_search(URM_train_list=URM_train_list, recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name], evaluator_validation_list=evaluator_list, metric_to_optimize="MAP", output_folder_path=output_folder_path, parallelize_search=args.parallelize, n_jobs=args.n_jobs, n_cases=args.n_cases, n_random_starts=args.n_random_starts) elif args.recommender_name in CONTENT_RECOMMENDER_CLASS_DICT.keys(): run_cv_parameter_search(URM_train_list=URM_train_list, ICM_train_list=ICM_train_list, ICM_name="ICM_all", recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name], evaluator_validation_list=evaluator_list, metric_to_optimize="MAP", output_folder_path=output_folder_path, parallelize_search=args.parallelize, n_jobs=args.n_jobs, n_cases=args.n_cases, n_random_starts=args.n_random_starts) elif args.recommender_name in DEMOGRAPHIC_RECOMMENDER_CLASS_DICT.keys(): run_cv_parameter_search(URM_train_list=URM_train_list, UCM_train_list=UCM_train_list, UCM_name="UCM_all", recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name], evaluator_validation_list=evaluator_list, metric_to_optimize="MAP", output_folder_path=output_folder_path, parallelize_search=args.parallelize, n_jobs=args.n_jobs, n_cases=args.n_cases, n_random_starts=args.n_random_starts) elif args.recommender_name in SIDE_INFO_CLASS_DICT: temp_list = [] for i, URM in enumerate(URM_train_list): temp = sps.vstack([URM, ICM_train_list[i].T], format="csr") #temp = TF_IDF(temp).tocsr() temp_list.append(temp) run_cv_parameter_search(URM_train_list=temp_list, recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name], evaluator_validation_list=evaluator_list, metric_to_optimize="MAP", output_folder_path=output_folder_path, parallelize_search=args.parallelize, n_jobs=args.n_jobs, n_cases=args.n_cases, n_random_starts=args.n_random_starts) print("...tuning ended")
data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=1, allow_cold_users=False, use_validation_set=False, force_new_split=True, seed=get_split_seed()) data_reader.load_data() URM_train, URM_test = data_reader.get_holdout_split() ICM_all, _ = get_ICM_train_new(data_reader) UCM_all = get_UCM_train(data_reader) ignore_users = get_ignore_users( URM_train, data_reader.get_original_user_id_to_index_mapper(), lower_threshold=-1, upper_threshold=22, ignore_non_target_users=True) # Setting evaluator cutoff_list = [10] evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=ignore_users) version_path = "../../report/hp_tuning/bagging/" now = datetime.now().strftime('%b%d_%H-%M-%S') now = now + "_k_out_value_3/" version_path = version_path + "/" + now hyper_parameters_range = {}
sub_6 = best_models.ItemCF.get_model(URM_train=URM_train, load_model=True, save_model=True) sub_6.RECOMMENDER_NAME = "ItemCF" sub_7 = best_models.SLIM_BPR.get_model(URM_train=URM_train, load_model=True, save_model=True) sub_list = [sub_0, sub_1, sub_2, sub_3, sub_4, sub_5, sub_6, sub_7] pure_svd_param = {'num_factors': 50, 'n_oversamples': 3, 'n_iter': 20, 'feature_weighting': 'TF-IDF'} pure_svd = NewPureSVDRecommender(URM_train) pure_svd.fit(**pure_svd_param) user_factors = np.array(pure_svd.USER_factors) item_factors = np.array(pure_svd.ITEM_factors) mapper = data_reader.get_original_user_id_to_index_mapper() ignore_users = get_ignore_users(URM_train, mapper, lower_threshold=LOWER_THRESHOLD, upper_threshold=2 ** 16 - 1, ignore_non_target_users=IGNORE_NON_TARGET_USERS) main_recommender = main_rec total_users = np.arange(URM_train.shape[0]) mask = np.in1d(total_users, ignore_users, invert=True) user_to_validate = total_users[mask] data_path = "../../data/" # Retrieve data for boosting train_df = get_train_dataframe_proportion(user_id_array=user_to_validate, cutoff=TRAIN_CUTOFF, main_recommender=main_recommender, recommender_list=sub_list, mapper=mapper, URM_train=URM_train, user_factors=user_factors,