def _load_previously_built_split_and_attributes(self, save_folder_path):
        """
        Loads all URM and ICM
        :return:
        """

        if self.allow_cold_users:
            allow_cold_users_file_name = "allow_cold_users"
        else:
            allow_cold_users_file_name = "only_warm_users"

        data_dict = pickle.load(
            open(save_folder_path + "URM_{}_fold_split_{}".format(self.n_folds, allow_cold_users_file_name), "rb"))

        for attrib_name in data_dict.keys():
            self.__setattr__(attrib_name, data_dict[attrib_name])

        for ICM_name in self.dataReader_object.get_loaded_ICM_names():
            ICM_object = pickle.load(open(save_folder_path + "{}".format(ICM_name), "rb"))
            self.__setattr__(ICM_name, ICM_object)

            pickle.load(open(save_folder_path + "tokenToFeatureMapper_{}".format(ICM_name), "rb"))
            self.__setattr__("tokenToFeatureMapper_{}".format(ICM_name), ICM_object)

        # MAPPER MANAGING
        if self.allow_cold_users:
            allow_cold_users_suffix = "allow_cold_users"
        else:
            allow_cold_users_suffix = "only_warm_users"
        name_suffix = "_{}".format(allow_cold_users_suffix)
        dataIO = DataIO(folder_path=save_folder_path)
        self.SPLIT_GLOBAL_MAPPER_DICT = dataIO.load_data(file_name="split_mappers" + name_suffix)
예제 #2
0
    def _set_search_attributes(self, recommender_input_args,
                               metric_to_optimize,
                               output_folder_path,
                               output_file_name_root,
                               resume_from_saved,
                               save_metadata,
                               n_cases):

        self.output_folder_path = output_folder_path
        self.output_file_name_root = output_file_name_root

        # If directory does not exist, create
        if not os.path.exists(self.output_folder_path):
            os.makedirs(self.output_folder_path)

        self.log_file = open(
            self.output_folder_path + self.output_file_name_root + "_{}.txt".format(self.ALGORITHM_NAME), "a")

        self.recommender_input_args_list = recommender_input_args
        self.metric_to_optimize = metric_to_optimize
        self.resume_from_saved = resume_from_saved
        self.save_metadata = save_metadata

        self.model_counter = 0
        self._init_metadata_dict(n_cases=n_cases)

        if self.save_metadata:
            self.dataIO = DataIO(folder_path=self.output_folder_path)
예제 #3
0
    def _save_dataset(self, save_folder_path):
        """
        Saves all URM, ICM and UCM
        :param save_folder_path:
        :return:
        """
        dataIO = DataIO(folder_path=save_folder_path)

        dataIO.save_data(data_dict_to_save=self._LOADED_GLOBAL_MAPPER_DICT,
                         file_name="dataset_global_mappers")

        dataIO.save_data(data_dict_to_save=self._LOADED_URM_DICT,
                         file_name="dataset_URM")

        if len(self.get_loaded_ICM_names()) > 0:
            dataIO.save_data(data_dict_to_save=self._LOADED_ICM_DICT,
                             file_name="dataset_ICM")

            dataIO.save_data(data_dict_to_save=self._LOADED_ICM_MAPPER_DICT,
                             file_name="dataset_ICM_mappers")

        if len(self.get_loaded_UCM_names()) > 0:
            dataIO.save_data(data_dict_to_save=self._LOADED_UCM_DICT,
                             file_name="dataset_UCM")
            dataIO.save_data(data_dict_to_save=self._LOADED_UCM_MAPPER_DICT,
                             file_name="dataset_UCM_mappers")
    def _load_previously_built_split_and_attributes(self, save_folder_path):
        """
        Loads all URM and ICM
        :return:
        """

        if self.use_validation_set:
            validation_set_suffix = "use_validation_set"
        else:
            validation_set_suffix = "no_validation_set"

        if self.allow_cold_users:
            allow_cold_users_suffix = "allow_cold_users"
        else:
            allow_cold_users_suffix = "only_warm_users"

        name_suffix = "_{}_{}".format(allow_cold_users_suffix, validation_set_suffix)

        dataIO = DataIO(folder_path=save_folder_path)

        split_parameters_dict = dataIO.load_data(file_name="split_parameters" + name_suffix)

        for attrib_name in split_parameters_dict.keys():
            self.__setattr__(attrib_name, split_parameters_dict[attrib_name])

        self.SPLIT_GLOBAL_MAPPER_DICT = dataIO.load_data(file_name="split_mappers" + name_suffix)

        self.SPLIT_URM_DICT = dataIO.load_data(file_name="split_URM" + name_suffix)

        if len(self.dataReader_object.get_loaded_ICM_names()) > 0:
            self.SPLIT_ICM_DICT = dataIO.load_data(file_name="split_ICM" + name_suffix)

            self.SPLIT_ICM_MAPPER_DICT = dataIO.load_data(file_name="split_ICM_mappers" + name_suffix)
예제 #5
0
    def save_model(self, folder_path, file_name=None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Saving model in file '{}'".format(folder_path +
                                                       file_name))

        data_dict_to_save = {"item_pop": self.item_pop}

        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name,
                         data_dict_to_save=data_dict_to_save)

        self._print("Saving complete")
예제 #6
0
    def load_model(self, folder_path, file_name=None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Loading model from file '{}'".format(folder_path +
                                                          file_name))

        dataIO = DataIO(folder_path=folder_path)
        data_dict = dataIO.load_data(file_name=file_name)

        for attrib_name in data_dict.keys():
            self.__setattr__(attrib_name, data_dict[attrib_name])

        self._print("Loading complete")
예제 #7
0
    def _save_dataset(self, save_folder_path):

        dataIO = DataIO(folder_path=save_folder_path)

        dataIO.save_data(data_dict_to_save=self._LOADED_GLOBAL_MAPPER_DICT,
                         file_name="dataset_global_mappers")

        dataIO.save_data(data_dict_to_save=self._LOADED_URM_DICT,
                         file_name="dataset_URM")

        if len(self.get_loaded_ICM_names()) > 0:
            dataIO.save_data(data_dict_to_save=self._LOADED_ICM_DICT,
                             file_name="dataset_ICM")

            dataIO.save_data(data_dict_to_save=self._LOADED_ICM_MAPPER_DICT,
                             file_name="dataset_ICM_mappers")
예제 #8
0
    def _load_from_saved_sparse_matrix(self, save_folder_path):
        """
        Loads all URM and ICM
        :return:
        """

        dataIO = DataIO(folder_path=save_folder_path)

        self._LOADED_GLOBAL_MAPPER_DICT = dataIO.load_data(
            file_name="dataset_global_mappers")

        self._LOADED_URM_DICT = dataIO.load_data(file_name="dataset_URM")

        if len(self.get_loaded_ICM_names()) > 0:
            self._LOADED_ICM_DICT = dataIO.load_data(file_name="dataset_ICM")

            self._LOADED_ICM_MAPPER_DICT = dataIO.load_data(
                file_name="dataset_ICM_mappers")
    def _set_search_attributes(self, recommender_input_args,
                               recommender_input_args_last_test,
                               metric_to_optimize, output_folder_path,
                               output_file_name_root, resume_from_saved,
                               save_metadata, save_model,
                               evaluate_on_test_each_best_solution, n_cases):

        if save_model not in self._SAVE_MODEL_VALUES:
            raise ValueError(
                "{}: parameter save_model must be in '{}', provided was '{}'.".
                format(self.ALGORITHM_NAME, self._SAVE_MODEL_VALUES,
                       save_model))

        self.output_folder_path = output_folder_path
        self.output_file_name_root = output_file_name_root

        # If directory does not exist, create
        if not os.path.exists(self.output_folder_path):
            os.makedirs(self.output_folder_path)

        self.log_file = open(
            self.output_folder_path + self.output_file_name_root +
            "_{}.txt".format(self.ALGORITHM_NAME), "a")

        if save_model == "last" and recommender_input_args_last_test is None:
            self._write_log(
                "{}: parameter save_model is 'last' but no recommender_input_args_last_test provided, saving best model on train data alone."
                .format(self.ALGORITHM_NAME))
            save_model = "best"

        self.recommender_input_args = recommender_input_args
        self.recommender_input_args_last_test = recommender_input_args_last_test
        self.metric_to_optimize = metric_to_optimize
        self.save_model = save_model
        self.resume_from_saved = resume_from_saved
        self.save_metadata = save_metadata
        self.evaluate_on_test_each_best_solution = evaluate_on_test_each_best_solution

        self.model_counter = 0
        self._init_metadata_dict(n_cases=n_cases)

        if self.save_metadata:
            self.dataIO = DataIO(folder_path=self.output_folder_path)
예제 #10
0
    def _save_split(self, save_folder_path):

        if save_folder_path:

            if self.allow_cold_users:
                allow_cold_users_suffix = "allow_cold_users"

            else:
                allow_cold_users_suffix = "only_warm_users"

            if self.use_validation_set:
                validation_set_suffix = "use_validation_set"
            else:
                validation_set_suffix = "no_validation_set"

            name_suffix = "_{}_{}".format(allow_cold_users_suffix,
                                          validation_set_suffix)

            split_parameters_dict = {
                "k_out_value": self.k_out_value,
                "allow_cold_users": self.allow_cold_users
            }

            dataIO = DataIO(folder_path=save_folder_path)

            dataIO.save_data(data_dict_to_save=split_parameters_dict,
                             file_name="split_parameters" + name_suffix)

            dataIO.save_data(data_dict_to_save=self.SPLIT_GLOBAL_MAPPER_DICT,
                             file_name="split_mappers" + name_suffix)

            dataIO.save_data(data_dict_to_save=self.SPLIT_URM_DICT,
                             file_name="split_URM" + name_suffix)

            if len(self.dataReader_object.get_loaded_ICM_names()) > 0:
                dataIO.save_data(data_dict_to_save=self.SPLIT_ICM_DICT,
                                 file_name="split_ICM" + name_suffix)

                dataIO.save_data(data_dict_to_save=self.SPLIT_ICM_MAPPER_DICT,
                                 file_name="split_ICM_mappers" + name_suffix)
    def save_model(self, folder_path, file_name = None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Saving model in file '{}'".format(folder_path + file_name))

        data_dict_to_save = {"USER_factors": self.USER_factors,
                              "ITEM_factors": self.ITEM_factors,
                              "use_bias": self.use_bias,
                             }

        if self.use_bias:
            data_dict_to_save["ITEM_bias"] = self.ITEM_bias
            data_dict_to_save["USER_bias"] = self.USER_bias
            data_dict_to_save["GLOBAL_bias"] = self.GLOBAL_bias


        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name, data_dict_to_save = data_dict_to_save)

        self._print("Saving complete")
예제 #12
0
    def _load_from_saved_sparse_matrix(self, save_folder_path):
        """
        Loads all URM, ICM and UCM
        :return:
        """
        dataIO = DataIO(folder_path=save_folder_path)
        self._LOADED_GLOBAL_MAPPER_DICT = dataIO.load_data(
            file_name="dataset_global_mappers")

        self._LOADED_URM_DICT = dataIO.load_data(file_name="dataset_URM")

        if len(self.get_loaded_ICM_names()) > 0:
            self._LOADED_ICM_DICT = dataIO.load_data(file_name="dataset_ICM")

            self._LOADED_ICM_MAPPER_DICT = dataIO.load_data(
                file_name="dataset_ICM_mappers")

        if len(self.get_loaded_UCM_names()) > 0:
            self._LOADED_UCM_DICT = dataIO.load_data(file_name="dataset_UCM")
            self._LOADED_UCM_MAPPER_DICT = dataIO.load_data(
                file_name="dataset_UCM_mappers")
            print(
                "RecSys2019Reader: WARNING --> There is no verification in the consistency of UCMs"
            )
    def _split_data_from_original_dataset(self, save_folder_path):

        self.dataReader_object.load_data()
        URM = self.dataReader_object.get_URM_all()

        # Managing data reader
        self.SPLIT_GLOBAL_MAPPER_DICT = {}
        for mapper_name, mapper_object in self.dataReader_object.get_loaded_Global_mappers().items():
            self.SPLIT_GLOBAL_MAPPER_DICT[mapper_name] = mapper_object.copy()

        URM = sps.csr_matrix(URM)

        if not self.allow_cold_users:
            user_interactions = np.ediff1d(URM.indptr)
            user_to_preserve = user_interactions >= self.n_folds
            user_to_remove = np.logical_not(user_to_preserve)

            print(
                "DataSplitter_Warm: Removing {} of {} users because they have less interactions than the number of folds".format(
                    URM.shape[0] - user_to_preserve.sum(), URM.shape[0]))

            URM = URM[user_to_preserve, :]

            self.SPLIT_GLOBAL_MAPPER_DICT["user_original_ID_to_index"] = reconcile_mapper_with_removed_tokens(
                self.SPLIT_GLOBAL_MAPPER_DICT["user_original_ID_to_index"],
                np.arange(0, len(user_to_remove), dtype=np.int)[user_to_remove])

        self.n_users, self.n_items = URM.shape

        URM = sps.csr_matrix(URM)

        # Create empty URM for each fold
        self.fold_split = {}

        for fold_index in range(self.n_folds):
            self.fold_split[fold_index] = {}
            self.fold_split[fold_index]["URM"] = sps.coo_matrix(URM.shape)

            URM_fold_object = self.fold_split[fold_index]["URM"]
            # List.extend is waaaay faster than numpy.concatenate
            URM_fold_object.row = []
            URM_fold_object.col = []
            URM_fold_object.data = []

        for user_id in range(self.n_users):

            start_user_position = URM.indptr[user_id]
            end_user_position = URM.indptr[user_id + 1]

            user_profile = URM.indices[start_user_position:end_user_position]

            indices_to_suffle = np.arange(len(user_profile), dtype=np.int)

            np.random.shuffle(indices_to_suffle)

            user_profile = user_profile[indices_to_suffle]
            user_interactions = URM.data[start_user_position:end_user_position][indices_to_suffle]

            # interactions_per_fold is a float number, to auto-adjust fold size
            interactions_per_fold = len(user_profile) / self.n_folds

            for fold_index in range(self.n_folds):

                start_pos = int(interactions_per_fold * fold_index)
                end_pos = int(interactions_per_fold * (fold_index + 1))

                if fold_index == self.n_folds - 1:
                    end_pos = len(user_profile)

                current_fold_user_profile = user_profile[start_pos:end_pos]
                current_fold_user_interactions = user_interactions[start_pos:end_pos]

                URM_fold_object = self.fold_split[fold_index]["URM"]

                URM_fold_object.row.extend([user_id] * len(current_fold_user_profile))
                URM_fold_object.col.extend(current_fold_user_profile)
                URM_fold_object.data.extend(current_fold_user_interactions)

        for fold_index in range(self.n_folds):
            URM_fold_object = self.fold_split[fold_index]["URM"]
            URM_fold_object.row = np.array(URM_fold_object.row, dtype=np.int)
            URM_fold_object.col = np.array(URM_fold_object.col, dtype=np.int)
            URM_fold_object.data = np.array(URM_fold_object.data, dtype=np.float)

            self.fold_split[fold_index]["URM"] = sps.csr_matrix(URM_fold_object)
            self.fold_split[fold_index]["items_in_fold"] = np.arange(0, self.n_items, dtype=np.int)

        fold_dict_to_save = {"fold_split": self.fold_split,
                             "n_folds": self.n_folds,
                             "n_items": self.n_items,
                             "n_users": self.n_users,
                             "allow_cold_users": self.allow_cold_users,
                             }

        if self.allow_cold_users:
            allow_user = "******"
        else:
            allow_user = "******"

        pickle.dump(fold_dict_to_save,
                    open(save_folder_path + "URM_{}_fold_split_{}".format(self.n_folds, allow_user), "wb"),
                    protocol=pickle.HIGHEST_PROTOCOL)

        for ICM_name in self.dataReader_object.get_loaded_ICM_names():
            pickle.dump(self.dataReader_object.get_ICM_from_name(ICM_name),
                        open(save_folder_path + "{}".format(ICM_name), "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)
            pickle.dump(self.dataReader_object.get_ICM_feature_to_index_mapper_from_name(ICM_name),
                        open(save_folder_path + "tokenToFeatureMapper_{}".format(ICM_name), "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            # MAPPER MANAGING
            if self.allow_cold_users:
                allow_cold_users_suffix = "allow_cold_users"
            else:
                allow_cold_users_suffix = "only_warm_users"
            name_suffix = "_{}".format(allow_cold_users_suffix)
            dataIO = DataIO(folder_path=save_folder_path)
            dataIO.save_data(data_dict_to_save=self.SPLIT_GLOBAL_MAPPER_DICT,
                             file_name="split_mappers" + name_suffix)


        print("DataSplitter: Split complete")