def test_specified_filenames_yielding_with_dirs(self): name, content = self.file_names_and_contents[1] # .hgignore expected = [(self.dir + name, content)] real = list( iter_unzip_from_bytes( self.zipped, filenames=['IGNORED-NON-EXISTENT', name, 'another ignored...'], yielding_with_dirs=True)) self.assertCountEqual(expected, real)
def iter_filenames_and_contents(self, multifile_unpacking=False): if self.is_multipart(): for msg in self.get_payload(): # recursive calls on sub-messages # (NOTE that all of them are instances of this class # because all have been created within a call of this # class' from_string()/from_file()) for name, content in msg.iter_filenames_and_contents( multifile_unpacking=multifile_unpacking): yield name, content else: content_type = self.get_content_type() payload = self.get_decoded_payload() filename = self.get_filename(None) ext = os.path.splitext(filename or '')[1].lower() # un-Gzip if necessary if (ext in self.GZIP_FILENAME_EXTENSIONS or content_type in self.GZIP_CONTENT_TYPES): try: payload = gzip_decompress(payload) except (IOError, EOFError) as exc: LOGGER.warning('Could not decompress file %r using GZip ' 'decoder (%s)', filename, exc) # un-ZIP if necessary elif (ext in self.ZIP_FILENAME_EXTENSIONS or content_type in self.ZIP_CONTENT_TYPES): try: names_and_contents = list(iter_unzip_from_bytes(payload)) except (zipfile.BadZipfile, RuntimeError) as exc: LOGGER.warning('Could not unpack file %r using ZIP ' 'decoder (%s)', filename, exc) else: if not names_and_contents: LOGGER.warning('No files in archive %r', filename) # yielding nothing return if multifile_unpacking: # all files from the archive will be yielded (with # their names prefixed with archive file name + '/') name_pattern = (filename or '') + '/{0}' for name, content in names_and_contents: yielded_name = name_pattern.format(name) LOGGER.debug('Yielding file %r...', yielded_name) yield yielded_name, content return # only one file from the archive will be yielded name, payload = names_and_contents[0] if len(names_and_contents) > 1: LOGGER.warning('Archive %r contains more than ' 'one file but only one (named ' '%r in the archive) will be ' 'yielded as the payload of %r', filename, name, filename) LOGGER.debug('Yielding file %r...', filename) yield filename, payload
def test_all_filenames(self): expected = self.file_names_and_contents real = list(iter_unzip_from_bytes(self.zipped)) self.assertCountEqual(expected, real)
def obtain_orig_data(self): data = self.download(self.config['url']) [(_, all_rows)] = iter_unzip_from_bytes(data, filenames=[self.CSV_FILENAME]) return all_rows