コード例 #1
0
    def _load_from_original_file(self):

        URM_all = self._originalReader.URM_DICT['URM_train'] + \
                  self._originalReader.URM_DICT['URM_validation'] + \
                  self._originalReader.URM_DICT['URM_test']

        n_users, n_items = URM_all.shape

        loaded_URM_dict = {"URM_all": URM_all,
                           "URM_test_negative": self._originalReader.URM_DICT['URM_test_negative']}

        loaded_ICM_dict = {"ICM_all": self._originalReader.ICM_DICT["ICM_all"]}
        loaded_ICM_mapper_dict = {"ICM_all": { i:i for i in range(self._originalReader.ICM_DICT["ICM_all"].shape[1])}}

        loaded_UCM_dict = {"UCM_all": self._originalReader.ICM_DICT["UCM_all"]}
        loaded_UCM_mapper_dict = {"UCM_all": { i:i for i in range(self._originalReader.ICM_DICT["UCM_all"].shape[1])}}

        user_original_ID_to_index = { i:i for i in range(n_users) }
        item_original_ID_to_index = { i:i for i in range(n_items) }

        loaded_dataset = Dataset(dataset_name = self._get_dataset_name(),
                                 URM_dictionary = loaded_URM_dict,
                                 ICM_dictionary = loaded_ICM_dict,
                                 ICM_feature_mapper_dictionary = loaded_ICM_mapper_dict,
                                 UCM_dictionary = loaded_UCM_dict,
                                 UCM_feature_mapper_dictionary = loaded_UCM_mapper_dict,
                                 user_original_ID_to_index= user_original_ID_to_index,
                                 item_original_ID_to_index= item_original_ID_to_index,
                                 is_implicit = self.IS_IMPLICIT,
                                 )

        return loaded_dataset
コード例 #2
0
    def _load_from_original_file(self):
        # Load data from original

        self._print("Loading original data")

        folder_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            compressed_file = gzip.open(folder_path + self.ZIP_NAME, )

        except FileNotFoundError:

            self._print("Unable to find data zip file. Downloading...")
            download_from_URL(self.DATASET_URL, folder_path, self.ZIP_NAME)

            compressed_file = gzip.open(folder_path + self.ZIP_NAME)

        URM_path = folder_path + self.FILE_RATINGS_PATH

        decompressed_file = open(URM_path, "w")

        self._save_GZ_in_text_file(compressed_file, decompressed_file)

        decompressed_file.close()

        self._print("loading URM")
        URM_all, item_original_ID_to_index, user_original_ID_to_index = load_CSV_into_SparseBuilder(
            URM_path,
            header=False,
            separator="\t",
            remove_duplicates=True,
            custom_user_item_rating_columns=[0, 4, 2])

        # URM_all contains the coordinates in textual format
        URM_all.data = np.ones_like(URM_all.data)

        loaded_URM_dict = {"URM_all": URM_all}

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=None,
            ICM_feature_mapper_dictionary=None,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        os.remove(URM_path)

        self._print("loading complete")

        return loaded_dataset
    def _load_from_original_file(self):
        # Load data from original

        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path +
                                       "hetrec2011-movielens-2k-v2.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path,
                              "hetrec2011-movielens-2k-v2.zip")

            dataFile = zipfile.ZipFile(zipFile_path +
                                       "hetrec2011-movielens-2k-v2.zip")

        URM_path = dataFile.extract("user_ratedmovies.dat",
                                    path=zipFile_path + "decompressed/")

        URM_all, item_original_ID_to_index, user_original_ID_to_index = load_CSV_into_SparseBuilder(
            URM_path,
            separator="\t",
            header=True,
            custom_user_item_rating_columns=[0, 1, 2])

        loaded_URM_dict = {"URM_all": URM_all}

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=None,
            ICM_feature_mapper_dictionary=None,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("loading complete")

        return loaded_dataset
コード例 #4
0
    def _load_from_original_file(self):
        # Load data from original

        self.zip_file_folder = self.DATASET_OFFLINE_ROOT_FOLDER + self.DATASET_SUBFOLDER
        self.decompressed_zip_file_folder = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            self.dataFile = zipfile.ZipFile(self.zip_file_folder +
                                            "netflix-prize-data.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to find data zip file.")
            self._print(
                "Automatic download not available, please ensure the ZIP data file is in folder {}."
                .format(self.zip_file_folder))
            self._print("Data can be downloaded here: {}".format(
                self.DATASET_URL))

            # If directory does not exist, create
            if not os.path.exists(self.zip_file_folder):
                os.makedirs(self.zip_file_folder)

            raise FileNotFoundError("Automatic download not available.")

        URM_all, self.item_original_ID_to_index, self.user_original_ID_to_index = self._loadURM(
        )

        loaded_URM_dict = {"URM_all": URM_all}

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=None,
            ICM_feature_mapper_dictionary=None,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=self.user_original_ID_to_index,
            item_original_ID_to_index=self.item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("loading complete")

        return loaded_dataset
コード例 #5
0
    def _load_from_original_file(self):
        # Load data from original

        self._print("Loading original data")

        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path + "filmtrust.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            print("FilmTrust: Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path, "filmtrust.zip")

            dataFile = zipfile.ZipFile(zipFile_path + "filmtrust.zip")

        URM_path = dataFile.extract("ratings.txt",
                                    path=zipFile_path + "decompressed/")

        URM_all, item_original_ID_to_index, user_original_ID_to_index = load_CSV_into_SparseBuilder(
            URM_path, separator=" ", header=False, remove_duplicates=True)

        loaded_URM_dict = {"URM_all": URM_all}

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=None,
            ICM_feature_mapper_dictionary=None,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("loading complete")

        return loaded_dataset
コード例 #6
0
    def _load_from_original_file(self):
        # Load data from original
        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path + "ml-1m.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path, "ml-1m.zip")

            dataFile = zipfile.ZipFile(zipFile_path + "ml-1m.zip")

        ICM_genre_path = dataFile.extract("ml-1m/movies.dat",
                                          path=zipFile_path + "decompressed/")
        UCM_path = dataFile.extract("ml-1m/users.dat",
                                    path=zipFile_path + "decompressed/")
        URM_path = dataFile.extract("ml-1m/ratings.dat",
                                    path=zipFile_path + "decompressed/")

        self._print("loading genres")
        ICM_genres, tokenToFeatureMapper_ICM_genres, item_original_ID_to_index = _loadICM_genres(
            ICM_genre_path, header=True, separator='::', genresSeparator="|")

        self._print("loading UCM")
        UCM_all, tokenToFeatureMapper_UCM_all, user_original_ID_to_index = _loadUCM(
            UCM_path, header=True, separator='::')

        self._print("loading URM")
        URM_all, item_original_ID_to_index, user_original_ID_to_index, URM_timestamp = _loadURM_preinitialized_item_id(
            URM_path,
            separator="::",
            header=False,
            if_new_user="******",
            if_new_item="ignore",
            item_original_ID_to_index=item_original_ID_to_index,
            user_original_ID_to_index=user_original_ID_to_index)
        loaded_URM_dict = {"URM_all": URM_all, "URM_timestamp": URM_timestamp}

        loaded_ICM_dict = {"ICM_genres": ICM_genres}
        loaded_ICM_mapper_dict = {
            "ICM_genres": tokenToFeatureMapper_ICM_genres
        }

        loaded_UCM_dict = {"UCM_all": UCM_all}
        loaded_UCM_mapper_dict = {"UCM_all": tokenToFeatureMapper_UCM_all}

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=loaded_ICM_dict,
            ICM_feature_mapper_dictionary=loaded_ICM_mapper_dict,
            UCM_dictionary=loaded_UCM_dict,
            UCM_feature_mapper_dictionary=loaded_UCM_mapper_dict,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("loading complete")

        return loaded_dataset
コード例 #7
0
    def generate_Dataset(self, dataset_name, is_implicit):

        assert (
            not self.__Dataset_finalized
        ), "Dataset mappers have already been generated, adding new data is forbidden"
        self.__Dataset_finalized = True

        # Generate ID to index mappers
        self._generate_global_mappers()
        self._generate_ICM_UCM_mappers()

        URM_DICT_sparse = {}
        ICM_DICT_sparse = {}
        UCM_DICT_sparse = {}

        on_new_ID = "ignore"

        for URM_name, URM_dataframe in self.URM_DICT.items():
            URM_sparse_builder = IncrementalSparseMatrix_FilterIDs(
                preinitialized_col_mapper=self.item_original_ID_to_index,
                preinitialized_row_mapper=self.user_original_ID_to_index,
                on_new_col=on_new_ID,
                on_new_row=on_new_ID,
            )

            URM_sparse_builder.add_data_lists(
                URM_dataframe["UserID"].values,
                URM_dataframe["ItemID"].values,
                URM_dataframe["Data"].values,
            )
            URM_DICT_sparse[URM_name] = URM_sparse_builder.get_SparseMatrix()

        for ICM_name, ICM_dataframe in self.ICM_DICT.items():
            feature_ID_to_index = self.ICM_mapper_DICT[ICM_name]
            ICM_sparse_builder = IncrementalSparseMatrix_FilterIDs(
                preinitialized_col_mapper=feature_ID_to_index,
                preinitialized_row_mapper=self.item_original_ID_to_index,
                on_new_col=on_new_ID,
                on_new_row=on_new_ID,
            )

            ICM_sparse_builder.add_data_lists(
                ICM_dataframe["ItemID"].values,
                ICM_dataframe["FeatureID"].values,
                ICM_dataframe["Data"].values,
            )
            ICM_DICT_sparse[ICM_name] = ICM_sparse_builder.get_SparseMatrix()

        for UCM_name, UCM_dataframe in self.UCM_DICT.items():
            feature_ID_to_index = self.UCM_mapper_DICT[UCM_name]
            UCM_sparse_builder = IncrementalSparseMatrix_FilterIDs(
                preinitialized_col_mapper=feature_ID_to_index,
                preinitialized_row_mapper=self.user_original_ID_to_index,
                on_new_col=on_new_ID,
                on_new_row=on_new_ID,
            )

            UCM_sparse_builder.add_data_lists(
                UCM_dataframe["UserID"].values,
                UCM_dataframe["FeatureID"].values,
                UCM_dataframe["Data"].values,
            )
            UCM_DICT_sparse[UCM_name] = UCM_sparse_builder.get_SparseMatrix()

        loaded_dataset = Dataset(
            dataset_name=dataset_name,
            URM_dictionary=URM_DICT_sparse,
            ICM_dictionary=ICM_DICT_sparse,
            ICM_feature_mapper_dictionary=self.ICM_mapper_DICT,
            UCM_dictionary=UCM_DICT_sparse,
            UCM_feature_mapper_dictionary=self.UCM_mapper_DICT,
            user_original_ID_to_index=self.user_original_ID_to_index,
            item_original_ID_to_index=self.item_original_ID_to_index,
            is_implicit=is_implicit,
        )

        return loaded_dataset
コード例 #8
0
    def load_data(self, save_folder_path=None):
        """
        :param save_folder_path:    path in which to save the loaded dataset
                                    None    use default "dataset_name/original/"
                                    False   do not save
        :return:
        """

        # Use default e.g., "dataset_name/original/"
        if save_folder_path is None:
            save_folder_path = self.DATASET_SPLIT_ROOT_FOLDER + self._get_dataset_name_root(
            ) + self._get_dataset_name_data_subfolder()

        # If save_folder_path contains any path try to load a previously built split from it
        if save_folder_path is not False and not self.reload_from_original_data:

            try:
                loaded_dataset = Dataset()
                loaded_dataset.load_data(save_folder_path)

                self._print("Verifying data consistency...")
                loaded_dataset.verify_data_consistency()
                self._print("Verifying data consistency... Passed!")

                loaded_dataset.print_statistics()
                return loaded_dataset

            except FileNotFoundError:

                self._print(
                    "Preloaded data not found, reading from original files...")

            except Exception:

                self._print(
                    "Reading split from {} caused the following exception...".
                    format(save_folder_path))
                traceback.print_exc()
                raise Exception("{}: Exception while reading split".format(
                    self._get_dataset_name()))

        self._print("Loading original data")
        loaded_dataset = self._load_from_original_file()

        self._print("Verifying data consistency...")
        loaded_dataset.verify_data_consistency()
        self._print("Verifying data consistency... Passed!")

        if save_folder_path not in [False]:

            # If directory does not exist, create
            if not os.path.exists(save_folder_path):
                self._print("Creating folder '{}'".format(save_folder_path))
                os.makedirs(save_folder_path)

            else:
                self._print("Found already existing folder '{}'".format(
                    save_folder_path))

            loaded_dataset.save_data(save_folder_path)

            self._print("Saving complete!")

        loaded_dataset.print_statistics()
        return loaded_dataset
    def _load_from_original_file_all_amazon_datasets(self,
                                                     URM_path,
                                                     metadata_path=None,
                                                     reviews_path=None):
        # Load data from original

        self._print("loading URM")
        URM_all, URM_timestamp, self.item_original_ID_to_index, self.user_original_ID_to_index = load_CSV_into_SparseBuilder(
            URM_path, separator=",", header=False, timestamp=True)

        loaded_URM_dict = {"URM_all": URM_all, "URM_timestamp": URM_timestamp}

        loaded_ICM_dict = {}
        loaded_ICM_mapper_dict = {}

        if metadata_path is not None:
            self._print("loading metadata")
            ICM_metadata, tokenToFeatureMapper_ICM_metadata, _ = self._loadMetadata(
                metadata_path, if_new_item="ignore")

            ICM_metadata, _, tokenToFeatureMapper_ICM_metadata = remove_features(
                ICM_metadata,
                min_occurrence=5,
                max_percentage_occurrence=0.30,
                reconcile_mapper=tokenToFeatureMapper_ICM_metadata)

            loaded_ICM_dict["ICM_metadata"] = ICM_metadata
            loaded_ICM_mapper_dict[
                "ICM_metadata"] = tokenToFeatureMapper_ICM_metadata

        if reviews_path is not None:
            self._print("loading reviews")
            ICM_reviews, tokenToFeatureMapper_ICM_reviews, _ = self._loadReviews(
                reviews_path, if_new_item="ignore")

            ICM_reviews, _, tokenToFeatureMapper_ICM_reviews = remove_features(
                ICM_reviews,
                min_occurrence=5,
                max_percentage_occurrence=0.30,
                reconcile_mapper=tokenToFeatureMapper_ICM_reviews)

            loaded_ICM_dict["ICM_reviews"] = ICM_reviews
            loaded_ICM_mapper_dict[
                "ICM_reviews"] = tokenToFeatureMapper_ICM_reviews

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=loaded_ICM_dict,
            ICM_feature_mapper_dictionary=loaded_ICM_mapper_dict,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=self.user_original_ID_to_index,
            item_original_ID_to_index=self.item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        # Clean temp files
        self._print("cleaning temporary files")

        if metadata_path is not None:
            os.remove(metadata_path)

        if reviews_path is not None:
            os.remove(reviews_path)

        self._print("loading complete")

        return loaded_dataset
コード例 #10
0
    def _load_from_original_file(self):
        # Load data from original

        self._print("Loading original data")

        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(
                zipFile_path + "neural_factorization_machine-master.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path,
                              "neural_factorization_machine-master.zip")

            dataFile = zipfile.ZipFile(
                zipFile_path + "neural_factorization_machine-master.zip")

        inner_path_in_zip = "neural_factorization_machine-master/data/frappe/"

        URM_train_path = dataFile.extract(inner_path_in_zip +
                                          "frappe.train.libfm",
                                          path=zipFile_path + "decompressed/")
        URM_test_path = dataFile.extract(inner_path_in_zip +
                                         "frappe.test.libfm",
                                         path=zipFile_path + "decompressed/")
        URM_validation_path = dataFile.extract(
            inner_path_in_zip + "frappe.validation.libfm",
            path=zipFile_path + "decompressed/")

        tmp_URM_train, item_original_ID_to_index, user_original_ID_to_index = self._loadURM(
            URM_train_path,
            item_original_ID_to_index=None,
            user_original_ID_to_index=None)

        tmp_URM_test, item_original_ID_to_index, user_original_ID_to_index = self._loadURM(
            URM_test_path,
            item_original_ID_to_index=item_original_ID_to_index,
            user_original_ID_to_index=user_original_ID_to_index)

        tmp_URM_validation, item_original_ID_to_index, user_original_ID_to_index = self._loadURM(
            URM_validation_path,
            item_original_ID_to_index=item_original_ID_to_index,
            user_original_ID_to_index=user_original_ID_to_index)

        shape = (len(user_original_ID_to_index),
                 len(item_original_ID_to_index))

        tmp_URM_train = reshapeSparse(tmp_URM_train, shape)
        tmp_URM_test = reshapeSparse(tmp_URM_test, shape)
        tmp_URM_validation = reshapeSparse(tmp_URM_validation, shape)

        URM_occurrence = tmp_URM_train + tmp_URM_test + tmp_URM_validation

        URM_all = URM_occurrence.copy()
        URM_all.data = np.ones_like(URM_all.data)

        loaded_URM_dict = {
            "URM_all": URM_all,
            "URM_occurrence": URM_occurrence
        }

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=None,
            ICM_feature_mapper_dictionary=None,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("loading complete")

        return loaded_dataset
コード例 #11
0
    def _load_from_original_file(self):
        # Load data from original

        zipFile_path =  self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path + "ml-20m.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            print("Movielens20MReader: Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path, "ml-20m.zip")

            dataFile = zipfile.ZipFile(zipFile_path + "ml-20m.zip")


        genres_path = dataFile.extract("ml-20m/movies.csv", path=zipFile_path + "decompressed/")
        tags_path = dataFile.extract("ml-20m/tags.csv", path=zipFile_path + "decompressed/")
        URM_path = dataFile.extract("ml-20m/ratings.csv", path=zipFile_path + "decompressed/")


        self._print("loading genres")
        ICM_genres, tokenToFeatureMapper_ICM_genres, item_original_ID_to_index = _loadICM_genres(genres_path, header=True, separator=',', genresSeparator="|")

        self._print("loading tags")
        ICM_tags, tokenToFeatureMapper_ICM_tags, _ = _loadICM_tags(tags_path, header=True, separator=',', if_new_item = "ignore",
                                                                    item_original_ID_to_index = item_original_ID_to_index)

        self._print("loading URM")
        URM_all, item_original_ID_to_index, user_original_ID_to_index, URM_timestamp = _loadURM_preinitialized_item_id(URM_path, separator=",",
                                                                                          header = True, if_new_user = "******", if_new_item = "ignore",
                                                                                          item_original_ID_to_index = item_original_ID_to_index)

        ICM_all, tokenToFeatureMapper_ICM_all = merge_ICM(ICM_genres, ICM_tags,
                                                          tokenToFeatureMapper_ICM_genres,
                                                          tokenToFeatureMapper_ICM_tags)


        loaded_URM_dict = {"URM_all": URM_all,
                           "URM_timestamp": URM_timestamp}

        loaded_ICM_dict = {"ICM_genres": ICM_genres,
                           "ICM_tags": ICM_tags,
                           "ICM_all": ICM_all}

        loaded_ICM_mapper_dict = {"ICM_genres": tokenToFeatureMapper_ICM_genres,
                                  "ICM_tags": tokenToFeatureMapper_ICM_tags,
                                  "ICM_all": tokenToFeatureMapper_ICM_all}


        loaded_dataset = Dataset(dataset_name = self._get_dataset_name(),
                                 URM_dictionary = loaded_URM_dict,
                                 ICM_dictionary = loaded_ICM_dict,
                                 ICM_feature_mapper_dictionary = loaded_ICM_mapper_dict,
                                 UCM_dictionary = None,
                                 UCM_feature_mapper_dictionary = None,
                                 user_original_ID_to_index= user_original_ID_to_index,
                                 item_original_ID_to_index= item_original_ID_to_index,
                                 is_implicit = self.IS_IMPLICIT,
                                 )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("saving URM and ICM")

        return loaded_dataset