Ejemplo n.º 1
0
def main():
    # Data loading
    root_data_path = os.path.join(get_project_root_path(), "data/")
    data_reader = RecSys2019Reader(root_data_path)
    data_reader = New_DataSplitter_leave_k_out(
        data_reader,
        k_out_value=K_OUT,
        use_validation_set=False,
        allow_cold_users=ALLOW_COLD_USERS,
        force_new_split=True,
        seed=get_split_seed())
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()
    ICM_all, _ = get_ICM_train_new(data_reader)
    UCM_all, _ = get_UCM_train_new(data_reader)

    # Ignoring users
    ignore_users = get_ignore_users(
        URM_train,
        data_reader.get_original_user_id_to_index_mapper(),
        lower_threshold=LOWER_THRESHOLD,
        upper_threshold=UPPER_THRESHOLD,
        ignore_non_target_users=IGNORE_NON_TARGET_USERS)
    evaluator = EvaluatorHoldout(URM_test,
                                 cutoff_list=[CUTOFF],
                                 ignore_users=ignore_users)

    # Model evaluation
    model = get_model(URM_train, ICM_all, UCM_all)
    print(evaluator.evaluateRecommender(model))
Ejemplo n.º 2
0
def read_split_load_data(k_out, allow_cold_users, seed):
    root_data_path = os.path.join(get_project_root_path(), "data/")
    data_reader = RecSys2019Reader(root_data_path)
    data_reader = New_DataSplitter_leave_k_out(
        data_reader,
        k_out_value=k_out,
        use_validation_set=False,
        allow_cold_users=allow_cold_users,
        force_new_split=True,
        seed=seed)
    data_reader.load_data()
    return data_reader
Ejemplo n.º 3
0
from course_lib.Base.Evaluation.Evaluator import EvaluatorHoldout
from src.data_management.New_DataSplitter_leave_k_out import New_DataSplitter_leave_k_out
from src.data_management.RecSys2019Reader import RecSys2019Reader
from src.data_management.dataframe_preprocesser import get_preprocessed_dataframe
from src.tuning.holdout_validation.run_parameter_search_advanced_top_pop import run_parameter_search_advanced_top_pop
from src.utils.general_utility_functions import get_split_seed

if __name__ == '__main__':
    # Data loading
    data_reader = RecSys2019Reader("../../data/")
    data_reader = New_DataSplitter_leave_k_out(data_reader,
                                               k_out_value=3,
                                               use_validation_set=False,
                                               force_new_split=True,
                                               seed=get_split_seed())
    data_reader.load_data()
    URM_train, URM_test = data_reader.get_holdout_split()
    mapper = data_reader.get_original_user_id_to_index_mapper()
    df = get_preprocessed_dataframe("../../data/", keep_warm_only=True)

    # Setting evaluator
    # warm_users_mask = np.ediff1d(URM_train.tocsr().indptr) > 0
    # warm_users = np.arange(URM_train.shape[0])[warm_users_mask]
    # ignore_users = warm_users
    cutoff_list = [10]
    evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list)

    # HP tuning
    print("Start tuning...")
    version_path = "../../report/hp_tuning/advanced_top_pop/"
    now = datetime.now().strftime('%b%d_%H-%M-%S')
Ejemplo n.º 4
0
import numpy as np
import xlearn as xl

from src.data_management.New_DataSplitter_leave_k_out import New_DataSplitter_leave_k_out
from src.data_management.RecSys2019Reader import RecSys2019Reader
from src.data_management.data_preprocessing_fm import format_URM_positive_non_compressed, \
    uniform_sampling_strategy, format_URM_positive_user_compressed, \
    format_URM_negative_sampling_user_compressed, mix_URM, add_ICM_info
from src.utils.general_utility_functions import get_split_seed, get_project_root_path

if __name__ == '__main__':
    dataset = RecSys2019Reader("../../data/")
    dataset = New_DataSplitter_leave_k_out(dataset, k_out_value=3, use_validation_set=False, force_new_split=True,
                                           seed=get_split_seed())
    dataset.load_data()
    URM_train, URM_test = dataset.get_holdout_split()
    ICM_all = dataset.get_ICM_from_name("ICM_all")

    URM_positive_FM_matrix = format_URM_positive_user_compressed(URM_train)
    URM_negative_FM_matrix = format_URM_negative_sampling_user_compressed(URM_train, negative_rate=1,
                                                                          sampling_function=uniform_sampling_strategy,
                                                                          check_replacement=True)
    URM_FM_matrix = mix_URM(URM_positive_FM_matrix, URM_negative_FM_matrix)[:, :-1]
    URM_FM_matrix = add_ICM_info(URM_FM_matrix, ICM_all, URM_train.shape[0])

    root_path = get_project_root_path()
    fm_data_path = os.path.join(root_path, "resources", "fm_data")

    # Prepare train sparse matrix and labels for dumping to file
    FM_sps_matrix = URM_FM_matrix.copy()