Exemple #1
0
def main():
    # Data loading
    root_data_path = os.path.join(get_project_root_path(), "data/")
    data_reader = RecSys2019Reader(root_data_path)
    data_reader = New_DataSplitter_leave_k_out(
        data_reader,
        k_out_value=K_OUT,
        use_validation_set=False,
        allow_cold_users=ALLOW_COLD_USERS,
        force_new_split=True,
        seed=get_split_seed())
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()
    ICM_all, _ = get_ICM_train_new(data_reader)
    UCM_all, _ = get_UCM_train_new(data_reader)

    # Ignoring users
    ignore_users = get_ignore_users(
        URM_train,
        data_reader.get_original_user_id_to_index_mapper(),
        lower_threshold=LOWER_THRESHOLD,
        upper_threshold=UPPER_THRESHOLD,
        ignore_non_target_users=IGNORE_NON_TARGET_USERS)
    evaluator = EvaluatorHoldout(URM_test,
                                 cutoff_list=[CUTOFF],
                                 ignore_users=ignore_users)

    # Model evaluation
    model = get_model(URM_train, ICM_all, UCM_all)
    print(evaluator.evaluateRecommender(model))
Exemple #2
0
def main():
    args = get_arguments()

    # Data loading
    root_data_path = args.reader_path
    data_reader = RecSys2019Reader(root_data_path)
    data_reader = New_DataSplitter_leave_k_out(data_reader, k_out_value=K_OUT, allow_cold_users=ALLOW_COLD_USERS,
                                               use_validation_set=False, force_new_split=True, seed=args.seed)
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    # Remove interactions to users that has len == 1 to URM_train
    len_1_users_mask = np.ediff1d(URM_train.tocsr().indptr) == 1
    len_1_users = np.arange(URM_train.shape[0])[len_1_users_mask]

    URM_train = URM_train.tolil()
    URM_train[len_1_users, :] = 0
    URM_train = URM_train.tocsr()

    # Remove interactions to users that has len == 1 to URM_test
    len_1_users_mask = np.ediff1d(URM_test.tocsr().indptr) == 1
    len_1_users = np.arange(URM_test.shape[0])[len_1_users_mask]

    URM_test = URM_test.tolil()
    URM_test[len_1_users, :] = 0
    URM_test = URM_test.tocsr()

    UCM_all = get_UCM_train_cold(data_reader)

    ignore_users = get_ignore_users(URM_train, data_reader.get_original_user_id_to_index_mapper(),
                                    lower_threshold=args.lower_threshold, upper_threshold=args.upper_threshold,
                                    ignore_non_target_users=args.exclude_non_target)
    ignore_users = np.concatenate([ignore_users, len_1_users])

    # Setting evaluator
    cutoff_list = [10]
    evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=ignore_users)

    # HP tuning
    print("Start tuning...")
    version_path = "../../report/hp_tuning/{}/".format(args.recommender_name)
    now = datetime.now().strftime('%b%d_%H-%M-%S')
    now = now + "_k_out_value_{}/".format(K_OUT)
    version_path = version_path + "/" + now

    run_parameter_search_user_demographic(URM_train=URM_train, UCM_object=UCM_all, UCM_name="UCM_all",
                                          recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name],
                                          evaluator_validation=evaluator,
                                          metric_to_optimize="MAP",
                                          output_folder_path=version_path,
                                          parallelizeKNN=True,
                                          n_cases=int(args.n_cases),
                                          n_random_starts=int(args.n_random_starts))

    print("...tuning ended")
Exemple #3
0
def main():
    args = get_arguments()

    # Data loading
    data_reader = read_split_load_data(3, args.allow_cold_users, args.seed)
    URM_train, URM_test = data_reader.get_holdout_split()

    ICM_categorical = data_reader.get_ICM_from_name("ICM_sub_class")
    ICM_numerical, _ = get_ICM_numerical(data_reader.dataReader_object)
    ICM_all, _ = get_ICM_train_new(data_reader)

    similarity_type_list = None
    if args.recommender_name == "item_cbf_numerical":
        ICM = ICM_numerical
        ICM_name = "ICM_numerical"
    elif args.recommender_name == "item_cbf_categorical":
        ICM = ICM_categorical
        ICM_name = "ICM_categorical"
    else:
        ICM = ICM_all
        ICM_name = "ICM_all"

    # Setting evaluator
    ignore_users = get_ignore_users(
        URM_train,
        data_reader.get_original_user_id_to_index_mapper(),
        lower_threshold=args.lower_threshold,
        upper_threshold=args.upper_threshold,
        ignore_non_target_users=args.exclude_non_target)
    evaluator = EvaluatorHoldout(URM_test,
                                 cutoff_list=[10],
                                 ignore_users=ignore_users)

    # HP tuning
    print("Start tuning...")
    version_path = "../../report/hp_tuning/{}/".format(args.recommender_name)
    now = datetime.now().strftime('%b%d_%H-%M-%S')
    now = now + "_k_out_value_3/"
    version_path = version_path + "/" + now

    run_parameter_search_item_content(
        URM_train=URM_train,
        ICM_object=ICM,
        ICM_name=ICM_name,
        recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name],
        evaluator_validation=evaluator,
        metric_to_optimize="MAP",
        output_folder_path=version_path,
        similarity_type_list=similarity_type_list,
        parallelizeKNN=True,
        n_cases=args.n_cases,
        n_random_starts=args.n_random_starts)
    print("...tuning ended")
Exemple #4
0
    UCM_all = get_UCM_train(data_reader)

    model = HybridWeightedAverageRecommender(URM_train, normalize=NORMALIZE)

    all_models = _get_all_models(URM_train=URM_train,
                                 UCM_all=UCM_all,
                                 ICM_all=ICM_all)
    for model_name, model_object in all_models.items():
        model.add_fitted_model(model_name, model_object)
    print("The models added in the hybrid are: {}".format(
        list(all_models.keys())))

    # Setting evaluator
    ignore_users = get_ignore_users(
        URM_train,
        data_reader.get_original_user_id_to_index_mapper(),
        lower_threshold=LOWER_THRESHOLD,
        upper_threshold=UPPER_THRESHOLD,
        ignore_non_target_users=IGNORE_NON_TARGET_USERS)

    evaluator = EvaluatorHoldout(URM_test,
                                 cutoff_list=[CUTOFF],
                                 ignore_users=ignore_users)

    version_path = "../../report/hp_tuning/hybrid_weighted_avg"
    now = datetime.now().strftime('%b%d_%H-%M-%S')
    now = now + "_k_out_value_{}/".format(K_OUT)
    version_path = version_path + "/" + now

    run_parameter_search_hybrid(model,
                                metric_to_optimize="MAP",
                                evaluator_validation=evaluator,
Exemple #5
0
    train_df = preprocess_dataframe_after_reading(train_df)
    y_train = train_df['label'].values + 1

    train_df = train_df.drop(columns=["label"], inplace=False)
    valid_df = preprocess_dataframe_after_reading(valid_df)
    valid_df = valid_df.drop(columns=[], inplace=False)

    _, non_zero_count, total = get_label_array(data_frame=train_df,
                                               URM_train=URM_train)
    y_valid, _, _ = get_label_array(data_frame=valid_df, URM_train=URM_test)

    # Setting evaluator
    mapper = data_reader.get_original_user_id_to_index_mapper()
    ignore_users = get_ignore_users(URM_train,
                                    mapper,
                                    lower_threshold=20,
                                    upper_threshold=2**16 - 1,
                                    ignore_non_target_users=True)
    evaluator = EvaluatorHoldout(URM_test,
                                 cutoff_list=[10],
                                 ignore_users=ignore_users)
    total_users = np.arange(URM_train.shape[0])
    mask = np.in1d(total_users, ignore_users, invert=True)
    users_to_validate = total_users[mask]

    # HP tuning
    print("Start tuning...")
    version_path = "../../report/hp_tuning/light_gbm/"
    now = datetime.now().strftime('%b%d_%H-%M-%S')
    now = now + "_k_out_value_3_eval/"
    version_path = version_path + now
Exemple #6
0
def main():
    set_env_variables()
    args = get_arguments()
    seeds = get_seed_lists(args.n_folds, get_split_seed())

    # --------- DATA LOADING SECTION --------- #
    URM_train_list = []
    ICM_train_list = []
    UCM_train_list = []
    evaluator_list = []
    for fold_idx in range(args.n_folds):
        # Read and split data
        data_reader = read_split_load_data(K_OUT, args.allow_cold_users, seeds[fold_idx])
        URM_train, URM_test = data_reader.get_holdout_split()
        ICM_train, item_feature2range = get_ICM_train_new(data_reader)
        UCM_train, user_feature2range = get_UCM_train_new(data_reader)

        # Ignore users and setting evaluator
        ignore_users = get_ignore_users(URM_train, data_reader.get_original_user_id_to_index_mapper(),
                                        args.lower_threshold, args.upper_threshold,
                                        ignore_non_target_users=args.exclude_non_target)

        # Ignore users by age
        # UCM_age = data_reader.get_UCM_from_name("UCM_age")
        # age_feature_to_id_mapper = data_reader.dataReader_object.get_UCM_feature_to_index_mapper_from_name("UCM_age")
        # age_demographic = get_user_demographic(UCM_age, age_feature_to_id_mapper, binned=True)
        # ignore_users = np.unique(np.concatenate((ignore_users, get_ignore_users_age(age_demographic, AGE_TO_KEEP))))

        URM_train_list.append(URM_train)
        ICM_train_list.append(ICM_train)
        UCM_train_list.append(UCM_train)

        evaluator = EvaluatorHoldout(URM_test, cutoff_list=[CUTOFF], ignore_users=np.unique(ignore_users))
        evaluator_list.append(evaluator)

    # --------- HYPER PARAMETERS TUNING SECTION --------- #
    print("Start tuning...")

    hp_tuning_path = "../../../report/hp_tuning/" + args.recommender_name + "/"
    date_string = datetime.now().strftime('%b%d_%H-%M-%S_k1_lt_{}/'.format(args.lower_threshold))
    output_folder_path = hp_tuning_path + date_string

    if args.recommender_name in COLLABORATIVE_RECOMMENDER_CLASS_DICT.keys():
        run_cv_parameter_search(URM_train_list=URM_train_list,
                                recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name],
                                evaluator_validation_list=evaluator_list,
                                metric_to_optimize="MAP", output_folder_path=output_folder_path,
                                parallelize_search=args.parallelize, n_jobs=args.n_jobs,
                                n_cases=args.n_cases, n_random_starts=args.n_random_starts)
    elif args.recommender_name in CONTENT_RECOMMENDER_CLASS_DICT.keys():
        run_cv_parameter_search(URM_train_list=URM_train_list, ICM_train_list=ICM_train_list, ICM_name="ICM_all",
                                recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name],
                                evaluator_validation_list=evaluator_list,
                                metric_to_optimize="MAP", output_folder_path=output_folder_path,
                                parallelize_search=args.parallelize, n_jobs=args.n_jobs,
                                n_cases=args.n_cases, n_random_starts=args.n_random_starts)
    elif args.recommender_name in DEMOGRAPHIC_RECOMMENDER_CLASS_DICT.keys():
        run_cv_parameter_search(URM_train_list=URM_train_list, UCM_train_list=UCM_train_list, UCM_name="UCM_all",
                                recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name],
                                evaluator_validation_list=evaluator_list,
                                metric_to_optimize="MAP", output_folder_path=output_folder_path,
                                parallelize_search=args.parallelize, n_jobs=args.n_jobs,
                                n_cases=args.n_cases, n_random_starts=args.n_random_starts)
    elif args.recommender_name in SIDE_INFO_CLASS_DICT:
        temp_list = []
        for i, URM in enumerate(URM_train_list):
            temp = sps.vstack([URM, ICM_train_list[i].T], format="csr")
            #temp = TF_IDF(temp).tocsr()
            temp_list.append(temp)

        run_cv_parameter_search(URM_train_list=temp_list,
                                recommender_class=RECOMMENDER_CLASS_DICT[args.recommender_name],
                                evaluator_validation_list=evaluator_list, metric_to_optimize="MAP",
                                output_folder_path=output_folder_path, parallelize_search=args.parallelize,
                                n_jobs=args.n_jobs, n_cases=args.n_cases, n_random_starts=args.n_random_starts)

    print("...tuning ended")
Exemple #7
0
    data_reader = New_DataSplitter_leave_k_out(data_reader,
                                               k_out_value=1,
                                               allow_cold_users=False,
                                               use_validation_set=False,
                                               force_new_split=True,
                                               seed=get_split_seed())
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()

    ICM_all, _ = get_ICM_train_new(data_reader)

    UCM_all = get_UCM_train(data_reader)

    ignore_users = get_ignore_users(
        URM_train,
        data_reader.get_original_user_id_to_index_mapper(),
        lower_threshold=-1,
        upper_threshold=22,
        ignore_non_target_users=True)

    # Setting evaluator
    cutoff_list = [10]
    evaluator = EvaluatorHoldout(URM_test,
                                 cutoff_list=cutoff_list,
                                 ignore_users=ignore_users)

    version_path = "../../report/hp_tuning/bagging/"
    now = datetime.now().strftime('%b%d_%H-%M-%S')
    now = now + "_k_out_value_3/"
    version_path = version_path + "/" + now

    hyper_parameters_range = {}
    sub_6 = best_models.ItemCF.get_model(URM_train=URM_train, load_model=True, save_model=True)
    sub_6.RECOMMENDER_NAME = "ItemCF"

    sub_7 = best_models.SLIM_BPR.get_model(URM_train=URM_train, load_model=True, save_model=True)

    sub_list = [sub_0, sub_1, sub_2, sub_3, sub_4, sub_5, sub_6, sub_7]

    pure_svd_param = {'num_factors': 50, 'n_oversamples': 3, 'n_iter': 20, 'feature_weighting': 'TF-IDF'}
    pure_svd = NewPureSVDRecommender(URM_train)
    pure_svd.fit(**pure_svd_param)
    user_factors = np.array(pure_svd.USER_factors)
    item_factors = np.array(pure_svd.ITEM_factors)

    mapper = data_reader.get_original_user_id_to_index_mapper()
    ignore_users = get_ignore_users(URM_train, mapper, lower_threshold=LOWER_THRESHOLD, upper_threshold=2 ** 16 - 1,
                                    ignore_non_target_users=IGNORE_NON_TARGET_USERS)
    main_recommender = main_rec
    total_users = np.arange(URM_train.shape[0])
    mask = np.in1d(total_users, ignore_users, invert=True)
    user_to_validate = total_users[mask]
    data_path = "../../data/"

    # Retrieve data for boosting

    train_df = get_train_dataframe_proportion(user_id_array=user_to_validate,
                                              cutoff=TRAIN_CUTOFF,
                                              main_recommender=main_recommender,
                                              recommender_list=sub_list,
                                              mapper=mapper,
                                              URM_train=URM_train,
                                              user_factors=user_factors,