def LoadDatasetFiles(self): # Gets all files within the dataset fold listFile = LoadFiles(self.__pathToDatasetName + '/docsutf8/*') print( f"\ndatasetID = {self.__dataset_name}; Number of Files = {len(listFile)}; Language of the Dataset = {self.__lan}" ) return listFile
def LoadDatasetFiles(self, docsFolder="/docsutf8/*"): # Gets all files within the dataset fold. # When no docsFolder is specified, the system will load the entire set of documents, which can be find under the docsutf8 folder # While this is the default behavior, this method is prepared to read the files from a specified folder (this will usually be the case when #we are conducting evaluation, in which case, we should be reading docs from the training document of a given fold listFile = LoadFiles(self.__pathToDatasetName + docsFolder) print( f"\ndatasetID = {self.__dataset_name}; Number of Files = {len(listFile)}; Language of the Dataset = {self.__lan}" ) return listFile