Ejemplos de download_from_URL en Python, ejemplos de Data_manager.DataReader_utils.download_from_URL en Python

Ejemplo n.º 1

0

Mostrar archivo

    def _load_from_original_file(self):
        # Load data from original

        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path + "ml-10m.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to find data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path, "ml-10m.zip")

            dataFile = zipfile.ZipFile(zipFile_path + "ml-10m.zip")

        ICM_genre_path = dataFile.extract("ml-10M100K/movies.dat",
                                          path=zipFile_path + "decompressed/")
        ICM_tags_path = dataFile.extract("ml-10M100K/tags.dat",
                                         path=zipFile_path + "decompressed/")
        URM_path = dataFile.extract("ml-10M100K/ratings.dat",
                                    path=zipFile_path + "decompressed/")

        self._print("Loading Item Features Genres")
        ICM_genres_dataframe, ICM_years_dataframe = _loadICM_genres_years(
            ICM_genre_path, header=None, separator='::', genresSeparator="|")

        self._print("Loading Item Features Tags")
        ICM_tags_dataframe = _loadICM_tags(ICM_tags_path,
                                           header=None,
                                           separator='::')

        ICM_all_dataframe = pd.concat(
            [ICM_genres_dataframe, ICM_tags_dataframe])

        self._print("Loading Interactions")
        URM_all_dataframe, URM_timestamp_dataframe = _loadURM(URM_path,
                                                              header=None,
                                                              separator='::')

        dataset_manager = DatasetMapperManager()
        dataset_manager.add_URM(URM_all_dataframe, "URM_all")
        dataset_manager.add_URM(URM_timestamp_dataframe, "URM_timestamp")
        dataset_manager.add_ICM(ICM_genres_dataframe, "ICM_genres")
        dataset_manager.add_ICM(ICM_years_dataframe, "ICM_year")
        dataset_manager.add_ICM(ICM_tags_dataframe, "ICM_tags")
        dataset_manager.add_ICM(ICM_all_dataframe, "ICM_all")

        loaded_dataset = dataset_manager.generate_Dataset(
            dataset_name=self._get_dataset_name(),
            is_implicit=self.IS_IMPLICIT)

        self._print("Cleaning Temporary Files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("Loading Complete")

        return loaded_dataset

Ejemplo n.º 2

0

Mostrar archivo

Archivo: GowallaReader.py Proyecto: yuzhaofeng/RecSys2019_DeepLearning_Evaluation

    def _load_from_original_file(self):
        # Load data from original

        self._print("Loading original data")

        folder_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            compressed_file = gzip.open(folder_path + self.ZIP_NAME, )

        except FileNotFoundError:

            self._print("Unable to find data zip file. Downloading...")
            download_from_URL(self.DATASET_URL, folder_path, self.ZIP_NAME)

            compressed_file = gzip.open(folder_path + self.ZIP_NAME)

        URM_path = folder_path + self.FILE_RATINGS_PATH

        decompressed_file = open(URM_path, "w")

        self._save_GZ_in_text_file(compressed_file, decompressed_file)

        decompressed_file.close()

        self._print("loading URM")
        URM_all, item_original_ID_to_index, user_original_ID_to_index = load_CSV_into_SparseBuilder(
            URM_path,
            header=False,
            separator="\t",
            remove_duplicates=True,
            custom_user_item_rating_columns=[0, 4, 2])

        # URM_all contains the coordinates in textual format
        URM_all.data = np.ones_like(URM_all.data)

        loaded_URM_dict = {"URM_all": URM_all}

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=None,
            ICM_feature_mapper_dictionary=None,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        os.remove(URM_path)

        self._print("loading complete")

        return loaded_dataset

Ejemplo n.º 3

0

Mostrar archivo

Archivo: Movielens1MReader.py Proyecto: riccardoverga/RecSys_Course_AT_PoliMi

    def _load_from_original_file(self):
        # Load data from original
        zipFile_path =  self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path + "ml-1m.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to find data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path, "ml-1m.zip")

            dataFile = zipfile.ZipFile(zipFile_path + "ml-1m.zip")


        ICM_genre_path = dataFile.extract("ml-1m/movies.dat", path=zipFile_path + "decompressed/")
        UCM_path = dataFile.extract("ml-1m/users.dat", path=zipFile_path + "decompressed/")
        URM_path = dataFile.extract("ml-1m/ratings.dat", path=zipFile_path + "decompressed/")

        self._print("Loading Interactions")
        URM_all_dataframe, URM_timestamp_dataframe = _loadURM(URM_path, header=None, separator='::')

        self._print("Loading Item Features genres")
        ICM_genres_dataframe = _loadICM_genres(ICM_genre_path, header=None, separator='::', genresSeparator="|")

        self._print("Loading User Features")
        UCM_dataframe = pd.read_csv(filepath_or_buffer=UCM_path, sep="::", header=None, dtype={0:str, 1:str, 2:str, 3:str, 4:str})
        UCM_dataframe.columns = ["UserID", "gender", "age_group", "occupation", "zip_code"]

        # For each user a list of features
        UCM_list = [[feature_name + "_" + str(UCM_dataframe[feature_name][index]) for feature_name in ["gender", "age_group", "occupation", "zip_code"]] for index in range(len(UCM_dataframe))]
        UCM_dataframe = pd.DataFrame(UCM_list, index=UCM_dataframe["UserID"]).stack()
        UCM_dataframe = UCM_dataframe.reset_index()[[0, 'UserID']]
        UCM_dataframe.columns = ['FeatureID', 'UserID']
        UCM_dataframe["Data"] = 1


        dataset_manager = DatasetMapperManager()
        dataset_manager.add_URM(URM_all_dataframe, "URM_all")
        dataset_manager.add_URM(URM_timestamp_dataframe, "URM_timestamp")
        dataset_manager.add_ICM(ICM_genres_dataframe, "ICM_genres")
        dataset_manager.add_UCM(UCM_dataframe, "UCM_all")

        loaded_dataset = dataset_manager.generate_Dataset(dataset_name=self._get_dataset_name(),
                                                          is_implicit=self.IS_IMPLICIT)

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("Loading Complete")

        return loaded_dataset

Ejemplo n.º 4

0

Mostrar archivo

Archivo: MovielensHetrec2011Reader.py Proyecto: yuzhaofeng/RecSys2019_DeepLearning_Evaluation

    def _load_from_original_file(self):
        # Load data from original

        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path +
                                       "hetrec2011-movielens-2k-v2.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path,
                              "hetrec2011-movielens-2k-v2.zip")

            dataFile = zipfile.ZipFile(zipFile_path +
                                       "hetrec2011-movielens-2k-v2.zip")

        URM_path = dataFile.extract("user_ratedmovies.dat",
                                    path=zipFile_path + "decompressed/")

        URM_all, item_original_ID_to_index, user_original_ID_to_index = load_CSV_into_SparseBuilder(
            URM_path,
            separator="\t",
            header=True,
            custom_user_item_rating_columns=[0, 1, 2])

        loaded_URM_dict = {"URM_all": URM_all}

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=None,
            ICM_feature_mapper_dictionary=None,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("loading complete")

        return loaded_dataset

Ejemplo n.º 5

0

Mostrar archivo

Archivo: _AmazonReviewDataReader.py Proyecto: yuzhaofeng/RecSys2019_DeepLearning_Evaluation

    def _get_ICM_metadata_path(self, data_folder, compressed_file_name,
                               decompressed_file_name, file_url):
        """
        Metadata files are .csv
        :param data_folder:
        :param file_name:
        :param file_url:
        :return:
        """

        try:

            open(data_folder + decompressed_file_name, "r")

        except FileNotFoundError:

            self._print("Decompressing metadata file...")

            try:

                decompressed_file = open(data_folder + decompressed_file_name,
                                         "wb")

                compressed_file = gzip.open(data_folder + compressed_file_name,
                                            "rb")
                decompressed_file.write(compressed_file.read())

                compressed_file.close()
                decompressed_file.close()

            except (FileNotFoundError, Exception):

                self._print(
                    "Unable to find or decompress compressed file. Downloading..."
                )

                download_from_URL(file_url, data_folder, compressed_file_name)

                decompressed_file = open(data_folder + decompressed_file_name,
                                         "wb")

                compressed_file = gzip.open(data_folder + compressed_file_name,
                                            "rb")
                decompressed_file.write(compressed_file.read())

                compressed_file.close()
                decompressed_file.close()

        return data_folder + decompressed_file_name

Ejemplo n.º 6

0

Mostrar archivo

Archivo: MovielensHetrec2011Reader.py Proyecto: riccardoverga/RecSys_Course_AT_PoliMi

    def _load_from_original_file(self):
        # Load data from original

        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path +
                                       "hetrec2011-movielens-2k-v2.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to find data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path,
                              "hetrec2011-movielens-2k-v2.zip")

            dataFile = zipfile.ZipFile(zipFile_path +
                                       "hetrec2011-movielens-2k-v2.zip")

        URM_path = dataFile.extract("user_ratedmovies.dat",
                                    path=zipFile_path + "decompressed/")

        self._print("Loading Interactions")
        URM_all_dataframe = pd.read_csv(filepath_or_buffer=URM_path,
                                        sep="\t",
                                        header=0,
                                        dtype={
                                            0: str,
                                            1: str,
                                            2: float
                                        },
                                        usecols=[0, 1, 2])
        URM_all_dataframe.columns = ["UserID", "ItemID", "Data"]

        dataset_manager = DatasetMapperManager()
        dataset_manager.add_URM(URM_all_dataframe, "URM_all")

        loaded_dataset = dataset_manager.generate_Dataset(
            dataset_name=self._get_dataset_name(),
            is_implicit=self.IS_IMPLICIT)

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("Loading Complete")

        return loaded_dataset

Ejemplo n.º 7

0

Mostrar archivo

    def _load_from_original_file(self):
        # Load data from original

        self._print("Loading original data")

        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path + "filmtrust.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            print("FilmTrust: Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path, "filmtrust.zip")

            dataFile = zipfile.ZipFile(zipFile_path + "filmtrust.zip")

        URM_path = dataFile.extract("ratings.txt",
                                    path=zipFile_path + "decompressed/")

        URM_all, item_original_ID_to_index, user_original_ID_to_index = load_CSV_into_SparseBuilder(
            URM_path, separator=" ", header=False, remove_duplicates=True)

        loaded_URM_dict = {"URM_all": URM_all}

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=None,
            ICM_feature_mapper_dictionary=None,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("loading complete")

        return loaded_dataset

Ejemplo n.º 8

0

Mostrar archivo

Archivo: _AmazonReviewDataReader.py Proyecto: yuzhaofeng/RecSys2019_DeepLearning_Evaluation

    def _get_URM_review_path(self, data_folder, file_name, file_url):
        """
        Metadata files are .csv
        :param data_folder:
        :param file_name:
        :param file_url:
        :return:
        """

        try:

            open(data_folder + file_name, "r")

        except FileNotFoundError:

            self._print("Unable to find or open review file. Downloading...")
            download_from_URL(file_url, data_folder, file_name)

        return data_folder + file_name

Ejemplo n.º 9

0

Mostrar archivo

Archivo: Movielens100KReader.py Proyecto: exotol/RecSys_Course_AT_PoliMi

    def _load_from_original_file(self):
        # Load data from original

        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path + "ml-100k.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to find data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path, "ml-100k.zip")

            dataFile = zipfile.ZipFile(zipFile_path + "ml-100k.zip")

        URM_path = dataFile.extract("ml-100k/u.data",
                                    path=zipFile_path + "decompressed/")

        self._print("Loading Interactions")
        URM_all_dataframe, URM_timestamp_dataframe = _loadURM(URM_path,
                                                              header=None,
                                                              separator='\t')

        dataset_manager = DatasetMapperManager()
        dataset_manager.add_URM(URM_all_dataframe, "URM_all")
        dataset_manager.add_URM(URM_timestamp_dataframe, "URM_timestamp")

        loaded_dataset = dataset_manager.generate_Dataset(
            dataset_name=self._get_dataset_name(),
            is_implicit=self.IS_IMPLICIT)

        self._print("Cleaning Temporary Files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("Loading Complete")

        return loaded_dataset

Ejemplo n.º 10

0

Mostrar archivo

    def _load_from_original_file(self):
        # Load data from original
        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path + "ml-1m.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path, "ml-1m.zip")

            dataFile = zipfile.ZipFile(zipFile_path + "ml-1m.zip")

        ICM_genre_path = dataFile.extract("ml-1m/movies.dat",
                                          path=zipFile_path + "decompressed/")
        UCM_path = dataFile.extract("ml-1m/users.dat",
                                    path=zipFile_path + "decompressed/")
        URM_path = dataFile.extract("ml-1m/ratings.dat",
                                    path=zipFile_path + "decompressed/")

        self._print("loading genres")
        ICM_genres, tokenToFeatureMapper_ICM_genres, item_original_ID_to_index = _loadICM_genres(
            ICM_genre_path, header=True, separator='::', genresSeparator="|")

        self._print("loading UCM")
        UCM_all, tokenToFeatureMapper_UCM_all, user_original_ID_to_index = _loadUCM(
            UCM_path, header=True, separator='::')

        self._print("loading URM")
        URM_all, item_original_ID_to_index, user_original_ID_to_index, URM_timestamp = _loadURM_preinitialized_item_id(
            URM_path,
            separator="::",
            header=False,
            if_new_user="******",
            if_new_item="ignore",
            item_original_ID_to_index=item_original_ID_to_index,
            user_original_ID_to_index=user_original_ID_to_index)
        loaded_URM_dict = {"URM_all": URM_all, "URM_timestamp": URM_timestamp}

        loaded_ICM_dict = {"ICM_genres": ICM_genres}
        loaded_ICM_mapper_dict = {
            "ICM_genres": tokenToFeatureMapper_ICM_genres
        }

        loaded_UCM_dict = {"UCM_all": UCM_all}
        loaded_UCM_mapper_dict = {"UCM_all": tokenToFeatureMapper_UCM_all}

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=loaded_ICM_dict,
            ICM_feature_mapper_dictionary=loaded_ICM_mapper_dict,
            UCM_dictionary=loaded_UCM_dict,
            UCM_feature_mapper_dictionary=loaded_UCM_mapper_dict,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("loading complete")

        return loaded_dataset

Ejemplo n.º 11

0

Mostrar archivo

Archivo: AmazonInstantVideoReader.py Proyecto: yuzhaofeng/RecSys2019_DeepLearning_Evaluation

    def __init__(self, pre_splitted_path):

        test_percentage = 0.2
        validation_percentage = 0.2

        pre_splitted_path += "data_split/"
        pre_splitted_filename = "splitted_data_"

        ratings_file_name = "ratings_Amazon_Instant_Video.csv"

        # If directory does not exist, create
        if not os.path.exists(pre_splitted_path):
            os.makedirs(pre_splitted_path)

        try:
            print("Dataset_AmazonInstantVideo: Attempting to load pre-splitted data")

            for attrib_name, attrib_object in load_data_dict_zip(pre_splitted_path, pre_splitted_filename).items():
                 self.__setattr__(attrib_name, attrib_object)


        except FileNotFoundError:

            print("Dataset_AmazonInstantVideo: Pre-splitted data not found, building new one")

            folder_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

            download_from_URL(self.DATASET_URL, folder_path, ratings_file_name)

            # read Amazon Instant Video
            df = pd.read_csv(folder_path + ratings_file_name, sep=',', header=None, names=['user', 'item', 'rating', 'timestamp'])[
                ['user', 'item', 'rating']]

            # keep only ratings = 5
            URM_train_builder = IncrementalSparseMatrix(auto_create_col_mapper=True, auto_create_row_mapper=True)
            URM_train_builder.add_data_lists(df['user'].values, df['item'].values, df['rating'].values)
            URM_all = URM_train_builder.get_SparseMatrix()

            URM_all.data = URM_all.data==5
            URM_all.eliminate_zeros()

            # keep only users with at least 5 ratings
            URM_all = ut.filter_urm(URM_all, user_min_number_ratings=5, item_min_number_ratings=1)

            # create train - test - validation

            URM_train_original, URM_test = split_train_validation_percentage_user_wise(URM_all, train_percentage=1-test_percentage, verbose=False)

            URM_train, URM_validation = split_train_validation_percentage_user_wise(URM_train_original, train_percentage=1-validation_percentage, verbose=False)

            self.URM_DICT = {
                "URM_train": URM_train,
                "URM_test": URM_test,
                "URM_validation": URM_validation,
            }

            save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename)



        print("Dataset_AmazonInstantVideo: Dataset loaded")

        ut.print_stat_datareader(self)

Ejemplo n.º 12

0

Mostrar archivo

    def _load_from_original_file(self):
        # Load data from original

        self._print("Loading original data")

        zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(
                zipFile_path + "neural_factorization_machine-master.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            self._print("Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path,
                              "neural_factorization_machine-master.zip")

            dataFile = zipfile.ZipFile(
                zipFile_path + "neural_factorization_machine-master.zip")

        inner_path_in_zip = "neural_factorization_machine-master/data/frappe/"

        URM_train_path = dataFile.extract(inner_path_in_zip +
                                          "frappe.train.libfm",
                                          path=zipFile_path + "decompressed/")
        URM_test_path = dataFile.extract(inner_path_in_zip +
                                         "frappe.test.libfm",
                                         path=zipFile_path + "decompressed/")
        URM_validation_path = dataFile.extract(
            inner_path_in_zip + "frappe.validation.libfm",
            path=zipFile_path + "decompressed/")

        tmp_URM_train, item_original_ID_to_index, user_original_ID_to_index = self._loadURM(
            URM_train_path,
            item_original_ID_to_index=None,
            user_original_ID_to_index=None)

        tmp_URM_test, item_original_ID_to_index, user_original_ID_to_index = self._loadURM(
            URM_test_path,
            item_original_ID_to_index=item_original_ID_to_index,
            user_original_ID_to_index=user_original_ID_to_index)

        tmp_URM_validation, item_original_ID_to_index, user_original_ID_to_index = self._loadURM(
            URM_validation_path,
            item_original_ID_to_index=item_original_ID_to_index,
            user_original_ID_to_index=user_original_ID_to_index)

        shape = (len(user_original_ID_to_index),
                 len(item_original_ID_to_index))

        tmp_URM_train = reshapeSparse(tmp_URM_train, shape)
        tmp_URM_test = reshapeSparse(tmp_URM_test, shape)
        tmp_URM_validation = reshapeSparse(tmp_URM_validation, shape)

        URM_occurrence = tmp_URM_train + tmp_URM_test + tmp_URM_validation

        URM_all = URM_occurrence.copy()
        URM_all.data = np.ones_like(URM_all.data)

        loaded_URM_dict = {
            "URM_all": URM_all,
            "URM_occurrence": URM_occurrence
        }

        loaded_dataset = Dataset(
            dataset_name=self._get_dataset_name(),
            URM_dictionary=loaded_URM_dict,
            ICM_dictionary=None,
            ICM_feature_mapper_dictionary=None,
            UCM_dictionary=None,
            UCM_feature_mapper_dictionary=None,
            user_original_ID_to_index=user_original_ID_to_index,
            item_original_ID_to_index=item_original_ID_to_index,
            is_implicit=self.IS_IMPLICIT,
        )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("loading complete")

        return loaded_dataset

Ejemplo n.º 13

0

Mostrar archivo

    def _load_from_original_file(self):
        # Load data from original

        zipFile_path =  self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER

        try:

            dataFile = zipfile.ZipFile(zipFile_path + "ml-20m.zip")

        except (FileNotFoundError, zipfile.BadZipFile):

            print("Movielens20MReader: Unable to fild data zip file. Downloading...")

            download_from_URL(self.DATASET_URL, zipFile_path, "ml-20m.zip")

            dataFile = zipfile.ZipFile(zipFile_path + "ml-20m.zip")


        genres_path = dataFile.extract("ml-20m/movies.csv", path=zipFile_path + "decompressed/")
        tags_path = dataFile.extract("ml-20m/tags.csv", path=zipFile_path + "decompressed/")
        URM_path = dataFile.extract("ml-20m/ratings.csv", path=zipFile_path + "decompressed/")


        self._print("loading genres")
        ICM_genres, tokenToFeatureMapper_ICM_genres, item_original_ID_to_index = _loadICM_genres(genres_path, header=True, separator=',', genresSeparator="|")

        self._print("loading tags")
        ICM_tags, tokenToFeatureMapper_ICM_tags, _ = _loadICM_tags(tags_path, header=True, separator=',', if_new_item = "ignore",
                                                                    item_original_ID_to_index = item_original_ID_to_index)

        self._print("loading URM")
        URM_all, item_original_ID_to_index, user_original_ID_to_index, URM_timestamp = _loadURM_preinitialized_item_id(URM_path, separator=",",
                                                                                          header = True, if_new_user = "******", if_new_item = "ignore",
                                                                                          item_original_ID_to_index = item_original_ID_to_index)

        ICM_all, tokenToFeatureMapper_ICM_all = merge_ICM(ICM_genres, ICM_tags,
                                                          tokenToFeatureMapper_ICM_genres,
                                                          tokenToFeatureMapper_ICM_tags)


        loaded_URM_dict = {"URM_all": URM_all,
                           "URM_timestamp": URM_timestamp}

        loaded_ICM_dict = {"ICM_genres": ICM_genres,
                           "ICM_tags": ICM_tags,
                           "ICM_all": ICM_all}

        loaded_ICM_mapper_dict = {"ICM_genres": tokenToFeatureMapper_ICM_genres,
                                  "ICM_tags": tokenToFeatureMapper_ICM_tags,
                                  "ICM_all": tokenToFeatureMapper_ICM_all}


        loaded_dataset = Dataset(dataset_name = self._get_dataset_name(),
                                 URM_dictionary = loaded_URM_dict,
                                 ICM_dictionary = loaded_ICM_dict,
                                 ICM_feature_mapper_dictionary = loaded_ICM_mapper_dict,
                                 UCM_dictionary = None,
                                 UCM_feature_mapper_dictionary = None,
                                 user_original_ID_to_index= user_original_ID_to_index,
                                 item_original_ID_to_index= item_original_ID_to_index,
                                 is_implicit = self.IS_IMPLICIT,
                                 )

        self._print("cleaning temporary files")

        shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True)

        self._print("saving URM and ICM")

        return loaded_dataset