def main(): args = get_arguments() # Data loading root_data_path = args.reader_path data_reader = RecSys2019Reader(root_data_path) data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=K_OUT, allow_cold_users=ALLOW_COLD_USERS, use_validation_set=False, force_new_split=True, seed=args.seed) data_reader.load_data() URM_train, URM_test = data_reader.get_holdout_split() # Remove interactions to users that has len == 1 to URM_train len_1_users_mask = np.ediff1d(URM_train.tocsr().indptr) == 1 len_1_users = np.arange(URM_train.shape[0])[len_1_users_mask] URM_train = URM_train.tolil() URM_train[len_1_users, :] = 0 URM_train = URM_train.tocsr() # Remove interactions to users that has len == 1 to URM_test len_1_users_mask = np.ediff1d(URM_test.tocsr().indptr) == 1 len_1_users = np.arange(URM_test.shape[0])[len_1_users_mask] URM_test = URM_test.tolil() URM_test[len_1_users, :] = 0 URM_test = URM_test.tocsr() UCM_all = get_UCM_train_cold(data_reader) ignore_users = get_ignore_users(URM_train, data_reader.get_original_user_id_to_index_mapper(), lower_threshold=args.lower_threshold, upper_threshold=args.upper_threshold, ignore_non_target_users=args.exclude_non_target) ignore_users = np.concatenate([ignore_users, len_1_users]) # Setting evaluator cutoff_list = [10] evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=ignore_users) # HP tuning print("Start tuning...") version_path = "../../report/hp_tuning/{}/".format(args.recommender_name) now = datetime.now().strftime('%b%d_%H-%M-%S') now = now + "_k_out_value_{}/".format(K_OUT) version_path = version_path + "/" + now run_parameter_search_user_demographic(URM_train=URM_train, UCM_object=UCM_all, UCM_name="UCM_all", recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name], evaluator_validation=evaluator, metric_to_optimize="MAP", output_folder_path=version_path, parallelizeKNN=True, n_cases=int(args.n_cases), n_random_starts=int(args.n_random_starts)) print("...tuning ended")
def main(): args = get_arguments() # Data loading data_reader = RecSys2019Reader(args.reader_path) data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=3, use_validation_set=False, force_new_split=True, seed=args.seed) data_reader.load_data() URM_train, URM_test = data_reader.get_holdout_split() if args.recommender_name == "sslim_bpr": ICM_all = get_ICM_train(data_reader) URM_train = sps.vstack([URM_train, ICM_all.T], format="csr") if args.recommender_name == "rp3beta_side": ICM_all = get_ICM_train(data_reader) URM_train = sps.vstack([URM_train, ICM_all.T], format="csr") URM_train = TF_IDF(URM_train).tocsr() if args.recommender_name == "pure_svd": URM_train = TF_IDF(URM_train).tocsr() if args.recommender_name == "pure_svd_side": ICM_all = get_ICM_train(data_reader) URM_train = sps.vstack([URM_train, ICM_all.T], format="csr") # Setting evaluator exclude_cold_users = args.exclude_users h = int(args.focus_on_high) fol = int(args.focus_on_low) if h != 0: print("Excluding users with less than {} interactions".format(h)) ignore_users_mask = np.ediff1d(URM_train.tocsr().indptr) < h ignore_users = np.arange(URM_train.shape[0])[ignore_users_mask] elif fol != 0: print("Excluding users with more than {} interactions".format(fol)) warm_users_mask = np.ediff1d(URM_train.tocsr().indptr) > fol ignore_users = np.arange(URM_train.shape[0])[warm_users_mask] if exclude_cold_users: cold_user_mask = np.ediff1d(URM_train.tocsr().indptr) == 0 cold_users = np.arange(URM_train.shape[0])[cold_user_mask] ignore_users = np.unique(np.concatenate((cold_users, ignore_users))) elif exclude_cold_users: print("Excluding cold users...") cold_user_mask = np.ediff1d(URM_train.tocsr().indptr) == 0 ignore_users = np.arange(URM_train.shape[0])[cold_user_mask] else: ignore_users = None cutoff_list = [10] evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=ignore_users) # HP tuning print("Start tuning...") version_path = "../../report/hp_tuning/{}/".format(args.recommender_name) now = datetime.now().strftime('%b%d_%H-%M-%S') now = now + "_k_out_value_3/" version_path = version_path + "/" + now runParameterSearch_Collaborative(URM_train=URM_train, recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name], evaluator_validation=evaluator, metric_to_optimize="MAP", output_folder_path=version_path, n_cases=int(args.n_cases), n_random_starts=int(args.n_random_starts)) print("...tuning ended")
from src.data_management.New_DataSplitter_leave_k_out import * from src.data_management.RecSys2019Reader import RecSys2019Reader from src.data_management.RecSys2019Reader_utils import merge_UCM, get_ICM_numerical from src.data_management.data_getter import get_warmer_UCM from src.model import best_models from src.utils.general_utility_functions import get_split_seed if __name__ == '__main__': os.environ["MKL_NUM_THREADS"] = "1" os.environ["OPENBLAS_NUM_THREADS"] = "1" # Data loading data_reader = RecSys2019Reader("../data/") data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=3, use_validation_set=False, force_new_split=True, seed=get_split_seed()) data_reader.load_data() URM_train, URM_test = data_reader.get_holdout_split() # Build ICMs ICM_numerical, _ = get_ICM_numerical(data_reader.dataReader_object) ICM = data_reader.get_ICM_from_name("ICM_sub_class") ICM_all, _ = merge_ICM(ICM, URM_train.transpose(), {}, {}) # Build UCMs URM_all = data_reader.dataReader_object.get_URM_all() UCM_age = data_reader.dataReader_object.get_UCM_from_name("UCM_age") UCM_region = data_reader.dataReader_object.get_UCM_from_name("UCM_region") UCM_age_region, _ = merge_UCM(UCM_age, UCM_region, {}, {})